diff --git a/clang/docs/StandardCPlusPlusModules.rst b/clang/docs/StandardCPlusPlusModules.rst index 7155ad6cff83f..47e9cf4a4c3db 100644 --- a/clang/docs/StandardCPlusPlusModules.rst +++ b/clang/docs/StandardCPlusPlusModules.rst @@ -1381,33 +1381,6 @@ declarations which use it. Thus, the preferred name will not be displayed in the debugger as expected. This is tracked by `#56490 `_. -Don't emit macros about module declaration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is covered by `P1857R3 `_. It is mentioned here -because we want users to be aware that we don't yet implement it. - -A direct approach to write code that can be compiled by both modules and -non-module builds may look like: - -.. code-block:: c++ - - MODULE - IMPORT header_name - EXPORT_MODULE MODULE_NAME; - IMPORT header_name - EXPORT ... - -The intent of this is that this file can be compiled like a module unit or a -non-module unit depending on the definition of some macros. However, this usage -is forbidden by P1857R3 which is not yet implemented in Clang. This means that -is possible to write invalid modules which will no longer be accepted once -P1857R3 is implemented. This is tracked by -`#54047 `_. - -Until then, it is recommended not to mix macros with module declarations. - - Inconsistent filename suffix requirement for importable module units ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index c7fe6e1db6d1f..2f4b335b0bf79 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -466,6 +466,9 @@ def err_pp_embed_device_file : Error< def ext_pp_extra_tokens_at_eol : ExtWarn< "extra tokens at end of #%0 directive">, InGroup; +def ext_pp_extra_tokens_at_module_directive_eol + : Warning<"extra tokens at end of '%0' directive">, + InGroup; def ext_pp_comma_expr : Extension<"comma operator in operand of #if">; def ext_pp_bad_vaargs_use : Extension< @@ -495,8 +498,8 @@ def warn_cxx98_compat_variadic_macro : Warning< InGroup, DefaultIgnore; def ext_named_variadic_macro : Extension< "named variadic macros are a GNU extension">, InGroup; -def err_embedded_directive : Error< - "embedding a #%0 directive within macro arguments is not supported">; +def err_embedded_directive : Error<"embedding a %select{#|C++ }0%1 directive " + "within macro arguments is not supported">; def ext_embedded_directive : Extension< "embedding a directive within macro arguments has undefined behavior">, InGroup>; @@ -986,6 +989,14 @@ def warn_module_conflict : Warning< InGroup; // C++20 modules +def err_pp_module_name_is_macro : Error< + "%select{module|partition}0 name component %1 cannot be a object-like macro">; +def err_pp_module_expected_ident : Error< + "expected %select{identifier after '.' in |}0module name">; +def err_pp_module_decl_in_header + : Error<"module declaration must not come from an #include directive">; +def err_pp_cond_span_module_decl + : Error<"preprocessor conditionals shall not span a module declaration">; def err_header_import_semi_in_macro : Error< "semicolon terminating header import declaration cannot be produced " "by a macro">; diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 4d9e123eb4ef1..e6e573a0d75ce 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1782,10 +1782,8 @@ def ext_bit_int : Extension< } // end of Parse Issue category. let CategoryName = "Modules Issue" in { -def err_unexpected_module_decl : Error< - "module declaration can only appear at the top level">; -def err_module_expected_ident : Error< - "expected a module name after '%select{module|import}0'">; +def err_unexpected_module_or_import_decl : Error< + "%select{module|import}0 declaration can only appear at the top level">; def err_attribute_not_module_attr : Error< "%0 attribute cannot be applied to a module">; def err_keyword_not_module_attr : Error< @@ -1796,6 +1794,10 @@ def err_keyword_not_import_attr : Error< "%0 cannot be applied to a module import">; def err_module_expected_semi : Error< "expected ';' after module name">; +def err_expected_semi_after_module_or_import + : Error<"%0 directive must end with a ';'">; +def note_module_declared_here : Note< + "%select{module|import}0 directive defined here">; def err_global_module_introducer_not_at_start : Error< "'module;' introducing a global module fragment can appear only " "at the start of the translation unit">; diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index e4044bcdfcc60..488c1bf9599a0 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -179,6 +179,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { LLVM_PREFERRED_TYPE(bool) unsigned IsModulesImport : 1; + // True if this is the 'module' contextual keyword. + LLVM_PREFERRED_TYPE(bool) + unsigned IsModulesDecl : 1; + // True if this is a mangled OpenMP variant name. LLVM_PREFERRED_TYPE(bool) unsigned IsMangledOpenMPVariantName : 1; @@ -215,8 +219,9 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false), IsModulesImport(false), - IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false), - IsRestrictExpansion(false), IsFinal(false), IsKeywordInCpp(false) {} + IsModulesDecl(false), IsMangledOpenMPVariantName(false), + IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false), + IsKeywordInCpp(false) {} public: IdentifierInfo(const IdentifierInfo &) = delete; @@ -528,6 +533,18 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { RecomputeNeedsHandleIdentifier(); } + /// Determine whether this is the contextual keyword \c module. + bool isModulesDeclaration() const { return IsModulesDecl; } + + /// Set whether this identifier is the contextual keyword \c module. + void setModulesDeclaration(bool I) { + IsModulesDecl = I; + if (I) + NeedsHandleIdentifier = true; + else + RecomputeNeedsHandleIdentifier(); + } + /// Determine whether this is the mangled name of an OpenMP variant. bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; } @@ -745,10 +762,11 @@ class IdentifierTable { // contents. II->Entry = &Entry; - // If this is the 'import' contextual keyword, mark it as such. + // If this is the 'import' or 'module' contextual keyword, mark it as such. if (Name == "import") II->setModulesImport(true); - + else if (Name == "module") + II->setModulesDeclaration(true); return *II; } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 9d1a23d1af218..2e6421e8c019a 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -133,6 +133,11 @@ PPKEYWORD(pragma) // C23 & C++26 #embed PPKEYWORD(embed) +// C++20 Module Directive +PPKEYWORD(module) +PPKEYWORD(__preprocessed_module) +PPKEYWORD(__preprocessed_import) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) @@ -1027,6 +1032,9 @@ ANNOTATION(module_include) ANNOTATION(module_begin) ANNOTATION(module_end) +// Annotations for C++, Clang and Objective-C named modules. +ANNOTATION(module_name) + // Annotation for a header_name token that has been looked up and transformed // into the name of a header unit. ANNOTATION(header_unit) diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index d84f3598cbf33..f0d11d43bdf97 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -76,6 +76,10 @@ const char *getPunctuatorSpelling(TokenKind Kind) LLVM_READNONE; /// tokens like 'int' and 'dynamic_cast'. Returns NULL for other token kinds. const char *getKeywordSpelling(TokenKind Kind) LLVM_READNONE; +/// Determines the spelling of simple Objective-C keyword tokens like '@import'. +/// Returns NULL for other token kinds. +const char *getObjCKeywordSpelling(ObjCKeywordKind Kind) LLVM_READNONE; + /// Returns the spelling of preprocessor keywords, such as "else". const char *getPPKeywordSpelling(PPKeywordKind Kind) LLVM_READNONE; diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index a6b6993b708d0..637c2b75e6413 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -895,7 +895,7 @@ class CompilerInstance : public ModuleLoader { /// load it. ModuleLoadResult findOrCompileModuleAndReadAST(StringRef ModuleName, SourceLocation ImportLoc, - SourceLocation ModuleNameLoc, + SourceRange ModuleNameRange, bool IsInclusionDirective); /// Creates a \c CompilerInstance for compiling a module. diff --git a/clang/include/clang/Lex/CodeCompletionHandler.h b/clang/include/clang/Lex/CodeCompletionHandler.h index bd3e05a36bb33..2ef29743415ae 100644 --- a/clang/include/clang/Lex/CodeCompletionHandler.h +++ b/clang/include/clang/Lex/CodeCompletionHandler.h @@ -13,12 +13,15 @@ #ifndef LLVM_CLANG_LEX_CODECOMPLETIONHANDLER_H #define LLVM_CLANG_LEX_CODECOMPLETIONHANDLER_H +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceLocation.h" #include "llvm/ADT/StringRef.h" namespace clang { class IdentifierInfo; class MacroInfo; +using ModuleIdPath = ArrayRef; /// Callback handler that receives notifications when performing code /// completion within the preprocessor. @@ -70,6 +73,11 @@ class CodeCompletionHandler { /// file where we expect natural language, e.g., a comment, string, or /// \#error directive. virtual void CodeCompleteNaturalLanguage() { } + + /// Callback invoked when performing code completion inside the module name + /// part of an import directive. + virtual void CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) {} }; } diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h index c0b742d652a03..a5d2e6220ac2f 100644 --- a/clang/include/clang/Lex/DependencyDirectivesScanner.h +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -142,6 +142,15 @@ void printDependencyDirectivesAsSource( /// \returns true if any C++20 named modules related directive was found. bool scanInputForCXX20ModulesUsage(StringRef Source); +/// Scan an input source buffer, and check whether the input ssource is a +/// preprocessed output. +/// +/// \param Source The input source buffer. +/// +/// \returns true if any '__preprocessed_module' or '__preprocessed_import' +/// directive was found. +bool isPreprocessedModuleFile(StringRef Source); + /// Functor that returns the dependency directives for a given file. class DependencyDirectivesGetter { public: diff --git a/clang/include/clang/Lex/ModuleLoader.h b/clang/include/clang/Lex/ModuleLoader.h index a58407200c41c..042a5ab1f4a57 100644 --- a/clang/include/clang/Lex/ModuleLoader.h +++ b/clang/include/clang/Lex/ModuleLoader.h @@ -159,6 +159,7 @@ class ModuleLoader { /// \returns Returns true if any modules with that symbol found. virtual bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) = 0; + static std::string getFlatNameFromPath(ModuleIdPath Path); bool HadFatalFailure = false; }; diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 39754847a93e4..08e04b5ec2416 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -48,6 +48,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Registry.h" +#include "llvm/Support/TrailingObjects.h" #include #include #include @@ -136,6 +137,64 @@ struct CXXStandardLibraryVersionInfo { std::uint64_t Version; }; +/// Record the previous 'export' keyword info. +// +// Since P1857R3, the standard introduced several rules to determine whether the +/// 'module', 'export module', 'import', 'export import' is a valid +/// directive introducer. This class used to record the previous 'export' +/// keyword token, and then handle 'export module' and 'export import'. +class ExportContextualKeywordInfo { + Token ExportTok; + bool AtPhysicalStartOfLine = false; + +public: + ExportContextualKeywordInfo() = default; + ExportContextualKeywordInfo(const Token &Tok, bool AtPhysicalStartOfLine) + : ExportTok(Tok), AtPhysicalStartOfLine(AtPhysicalStartOfLine) {} + + bool isValid() const { return ExportTok.is(tok::kw_export); } + bool isAtPhysicalStartOfLine() const { return AtPhysicalStartOfLine; } + Token getExportTok() const { return ExportTok; } + void reset() { + ExportTok.startToken(); + AtPhysicalStartOfLine = false; + } +}; + +class ModuleNameLoc final + : llvm::TrailingObjects { + friend TrailingObjects; + unsigned NumIdentifierLocs; + unsigned numTrailingObjects(OverloadToken) const { + return getNumIdentifierLocs(); + } + + ModuleNameLoc(ModuleIdPath Path) : NumIdentifierLocs(Path.size()) { + (void)llvm::copy(Path, getTrailingObjectsNonStrict()); + } + +public: + static ModuleNameLoc *Create(Preprocessor &PP, ModuleIdPath Path); + unsigned getNumIdentifierLocs() const { return NumIdentifierLocs; } + ModuleIdPath getModuleIdPath() const { + return {getTrailingObjectsNonStrict(), + getNumIdentifierLocs()}; + } + + SourceLocation getBeginLoc() const { + return getModuleIdPath().front().getLoc(); + } + SourceLocation getEndLoc() const { + auto &Last = getModuleIdPath().back(); + return Last.getLoc().getLocWithOffset( + Last.getIdentifierInfo()->getLength()); + } + SourceRange getRange() const { return {getBeginLoc(), getEndLoc()}; } + std::string str() const { + return ModuleLoader::getFlatNameFromPath(getModuleIdPath()); + } +}; + /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -339,8 +398,9 @@ class Preprocessor { /// lexed, if any. SourceLocation ModuleImportLoc; - /// The import path for named module that we're currently processing. - SmallVector NamedModuleImportPath; + /// The source location of the \c module contextual keyword we just + /// lexed, if any. + SourceLocation ModuleDeclLoc; llvm::DenseMap> CheckPoints; unsigned CheckPointCounter = 0; @@ -351,6 +411,12 @@ class Preprocessor { /// Whether the last token we lexed was an '@'. bool LastTokenWasAt = false; + /// Whether we're importing a standard C++20 named Modules. + bool ImportingCXXNamedModules = false; + + /// Whether the last token we lexed was an 'export' keyword. + ExportContextualKeywordInfo LastTokenWasExportKeyword; + /// First pp-token source location in current translation unit. SourceLocation FirstPPTokenLoc; @@ -562,9 +628,9 @@ class Preprocessor { reset(); } - void handleIdentifier(IdentifierInfo *Identifier) { - if (isModuleCandidate() && Identifier) - Name += Identifier->getName().str(); + void handleModuleName(ModuleNameLoc *NameLoc) { + if (isModuleCandidate() && NameLoc) + Name += NameLoc->str(); else if (!isNamedModule()) reset(); } @@ -576,13 +642,6 @@ class Preprocessor { reset(); } - void handlePeriod() { - if (isModuleCandidate()) - Name += "."; - else if (!isNamedModule()) - reset(); - } - void handleSemi() { if (!Name.empty() && isModuleCandidate()) { if (State == InterfaceCandidate) @@ -639,10 +698,6 @@ class Preprocessor { ModuleDeclSeq ModuleDeclState; - /// Whether the module import expects an identifier next. Otherwise, - /// it expects a '.' or ';'. - bool ModuleImportExpectsIdentifier = false; - /// The identifier and source location of the currently-active /// \#pragma clang arc_cf_code_audited begin. IdentifierLoc PragmaARCCFCodeAuditedInfo; @@ -1125,6 +1180,9 @@ class Preprocessor { /// Whether tokens are being skipped until the through header is seen. bool SkippingUntilPCHThroughHeader = false; + /// Whether the main file is preprocessed module file. + bool MainFileIsPreprocessedModuleFile = false; + /// \{ /// Cache of macro expanders to reduce malloc traffic. enum { TokenLexerCacheSize = 8 }; @@ -1777,6 +1835,29 @@ class Preprocessor { std::optional LexEmbedParameters(Token &Current, bool ForHasEmbed); + /// Whether the main file is preprocessed module file. + bool isPreprocessedModuleFile() const { + return MainFileIsPreprocessedModuleFile; + } + + bool LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, + SmallVectorImpl &Suffix, + SmallVectorImpl &Path, + bool AllowMacroExpansion = true, + bool IsPartition = false); + void EnterModuleSuffixTokenStream(ArrayRef Toks); + void HandleCXXImportDirective(Token Import); + void HandleCXXModuleDirective(Token Module); + + /// Callback invoked when the lexer sees one of export, import or module token + /// at the start of a line. + /// + /// This consumes the import/module directive, modifies the + /// lexer/preprocessor state, and advances the lexer(s) so that the next token + /// read is the correct one. + bool HandleModuleContextualKeyword(Token &Result, + bool TokAtPhysicalStartOfLine); + /// Get the start location of the first pp-token in main file. SourceLocation getMainFileFirstPPTokenLoc() const { assert(FirstPPTokenLoc.isValid() && @@ -1785,7 +1866,10 @@ class Preprocessor { } bool LexAfterModuleImport(Token &Result); - void CollectPpImportSuffix(SmallVectorImpl &Toks); + void CollectPPImportSuffix(SmallVectorImpl &Toks, + bool StopUntilEOD = false); + bool CollectPPImportSuffixAndEnterStream(SmallVectorImpl &Toks, + bool StopUntilEOD = false); void makeModuleVisible(Module *M, SourceLocation Loc, bool IncludeExports = true); @@ -2401,20 +2485,27 @@ class Preprocessor { /// If \p EnableMacros is true, then we consider macros that expand to zero /// tokens as being ok. /// + /// If \p ExtraToks not null, the extra tokens will be saved in this + /// container. + /// /// \return The location of the end of the directive (the terminating /// newline). - SourceLocation CheckEndOfDirective(const char *DirType, - bool EnableMacros = false); + SourceLocation + CheckEndOfDirective(StringRef DirType, bool EnableMacros = false, + SmallVectorImpl *ExtraToks = nullptr); /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. Returns the range of the skipped tokens. - SourceRange DiscardUntilEndOfDirective() { + SourceRange + DiscardUntilEndOfDirective(SmallVectorImpl *DiscardedToks = nullptr) { Token Tmp; - return DiscardUntilEndOfDirective(Tmp); + return DiscardUntilEndOfDirective(Tmp, DiscardedToks); } /// Same as above except retains the token that was found. - SourceRange DiscardUntilEndOfDirective(Token &Tok); + SourceRange + DiscardUntilEndOfDirective(Token &Tok, + SmallVectorImpl *DiscardedToks = nullptr); /// Returns true if the preprocessor has seen a use of /// __DATE__ or __TIME__ in the file so far. @@ -2485,11 +2576,10 @@ class Preprocessor { } /// If we're importing a standard C++20 Named Modules. - bool isInImportingCXXNamedModules() const { - // NamedModuleImportPath will be non-empty only if we're importing - // Standard C++ named modules. - return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules && - !IsAtImport; + bool isImportingCXXNamedModules() const { + assert(getLangOpts().CPlusPlusModules && + "Import C++ named modules are only valid for C++20 modules"); + return ImportingCXXNamedModules; } /// Allocate a new MacroInfo object with the provided SourceLocation. diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index d9dc5a562d802..ae9ab99cf00b1 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -291,6 +291,11 @@ class Token { /// Return the ObjC keyword kind. tok::ObjCKeywordKind getObjCKeywordID() const; + /// Return true if we have an C++20 Modules contextual keyword(export, import + /// or module). + bool isModuleContextualKeyword(const LangOptions &LangOpts, + bool AllowExport = true) const; + bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const; /// Return true if this token has trigraphs or escaped newlines in it. diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 0456dd961fc30..8f733dc088603 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -100,6 +100,10 @@ class TokenLexer { /// See the flag documentation for details. bool IsReinject : 1; + /// This is true if this TokenLexer is created when handling C++ module + /// directive. + bool LexingCXXModuleDirective : 1; + public: /// Create a TokenLexer for the specified macro with the specified actual /// arguments. Note that this ctor takes ownership of the ActualArgs pointer. @@ -151,6 +155,14 @@ class TokenLexer { /// preprocessor directive. bool isParsingPreprocessorDirective() const; + /// setLexingCXXModuleDirective - This is set to true if this TokenLexer is + /// created when handling C++ module directive. + void setLexingCXXModuleDirective(bool Val = true); + + /// isLexingCXXModuleDirective - Return true if we are lexing a C++ module or + /// import directive. + bool isLexingCXXModuleDirective() const; + private: void destroy(); diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 30edd303e1824..4fdb6a8d461fe 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -566,10 +566,6 @@ class Parser : public CodeCompletionHandler { /// Contextual keywords for Microsoft extensions. IdentifierInfo *Ident__except; - // C++2a contextual keywords. - mutable IdentifierInfo *Ident_import; - mutable IdentifierInfo *Ident_module; - std::unique_ptr CommentSemaHandler; /// Gets set to true after calling ProduceSignatureHelp, it is for a @@ -1081,6 +1077,9 @@ class Parser : public CodeCompletionHandler { bool ParseModuleName(SourceLocation UseLoc, SmallVectorImpl &Path, bool IsImport); + void DiagnoseInvalidCXXModuleDecl(const Sema::ModuleImportState &ImportState); + void DiagnoseInvalidCXXModuleImport(); + //===--------------------------------------------------------------------===// // Preprocessor code-completion pass-through void CodeCompleteDirective(bool InConditional) override; @@ -1091,6 +1090,8 @@ class Parser : public CodeCompletionHandler { unsigned ArgumentIndex) override; void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled) override; void CodeCompleteNaturalLanguage() override; + void CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) override; ///@} diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index 4a2b77cd16bfc..3088acef21497 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -349,8 +349,11 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { if (LangOpts.IEEE128) AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this); - // Add the 'import' contextual keyword. + // Add the 'import' and 'module' contextual keywords. get("import").setModulesImport(true); + get("module").setModulesDeclaration(true); + get("__preprocessed_import").setModulesImport(true); + get("__preprocessed_module").setModulesDeclaration(true); } /// Checks if the specified token kind represents a keyword in the @@ -464,6 +467,13 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { unsigned Len = getLength(); if (Len < 2) return tok::pp_not_keyword; const char *Name = getNameStart(); + + if (Name[0] == '_' && isModulesImport()) + return tok::pp___preprocessed_import; + if (Name[0] == '_' && isModulesDeclaration()) + return tok::pp___preprocessed_module; + + // clang-format off switch (HASH(Len, Name[0], Name[2])) { default: return tok::pp_not_keyword; CASE( 2, 'i', '\0', if); @@ -482,6 +492,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 6, 'd', 'f', define); CASE( 6, 'i', 'n', ifndef); CASE( 6, 'i', 'p', import); + CASE( 6, 'm', 'd', module); CASE( 6, 'p', 'a', pragma); CASE( 7, 'd', 'f', defined); @@ -501,6 +512,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { #undef CASE #undef HASH } + // clang-format on } //===----------------------------------------------------------------------===// diff --git a/clang/lib/Basic/TokenKinds.cpp b/clang/lib/Basic/TokenKinds.cpp index c300175ce90ba..a5b8c998d9b8e 100644 --- a/clang/lib/Basic/TokenKinds.cpp +++ b/clang/lib/Basic/TokenKinds.cpp @@ -46,6 +46,18 @@ const char *tok::getKeywordSpelling(TokenKind Kind) { return nullptr; } +const char *tok::getObjCKeywordSpelling(ObjCKeywordKind Kind) { + switch (Kind) { +#define OBJC_AT_KEYWORD(X) \ + case objc_##X: \ + return "@" #X; +#include "clang/Basic/TokenKinds.def" + default: + break; + } + return nullptr; +} + const char *tok::getPPKeywordSpelling(tok::PPKeywordKind Kind) { switch (Kind) { #define PPKEYWORD(x) case tok::pp_##x: return #x; diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index d6f3aec981336..44c6a0eae185f 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1830,8 +1830,8 @@ static ModuleSource selectModuleSource( } ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( - StringRef ModuleName, SourceLocation ImportLoc, - SourceLocation ModuleNameLoc, bool IsInclusionDirective) { + StringRef ModuleName, SourceLocation ImportLoc, SourceRange ModuleNameRange, + bool IsInclusionDirective) { // Search for a module with the given name. HeaderSearch &HS = PP->getHeaderSearchInfo(); Module *M = @@ -1848,10 +1848,11 @@ ModuleLoadResult CompilerInstance::findOrCompileModuleAndReadAST( std::string ModuleFilename; ModuleSource Source = selectModuleSource(M, ModuleName, ModuleFilename, BuiltModules, HS); + SourceLocation ModuleNameLoc = ModuleNameRange.getBegin(); if (Source == MS_ModuleNotFound) { // We can't find a module, error out here. getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_found) - << ModuleName << SourceRange(ImportLoc, ModuleNameLoc); + << ModuleName << ModuleNameRange; return nullptr; } if (ModuleFilename.empty()) { @@ -2037,8 +2038,11 @@ CompilerInstance::loadModule(SourceLocation ImportLoc, MM.cacheModuleLoad(*Path[0].getIdentifierInfo(), Module); } else { + SourceLocation ModuleNameEndLoc = Path.back().getLoc().getLocWithOffset( + Path.back().getIdentifierInfo()->getLength()); ModuleLoadResult Result = findOrCompileModuleAndReadAST( - ModuleName, ImportLoc, ModuleNameLoc, IsInclusionDirective); + ModuleName, ImportLoc, SourceRange{ModuleNameLoc, ModuleNameEndLoc}, + IsInclusionDirective); if (!Result.isNormal()) return Result; if (!Result) diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 9e046633328d7..0dc8a86e604d3 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -245,6 +245,8 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { unsigned GetNumToksToSkip() const { return NumToksToSkip; } void ResetSkipToks() { NumToksToSkip = 0; } + + const Token &GetPrevToken() const { return PrevTok; } }; } // end anonymous namespace @@ -758,7 +760,8 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, if (Tok.is(tok::eof) || (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && - !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed))) + !Tok.is(tok::annot_repl_input_end) && !Tok.is(tok::annot_embed) && + !Tok.is(tok::annot_module_name))) return; // EmittedDirectiveOnThisLine takes priority over RequireSameLine. @@ -893,6 +896,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, !PP.getCommentRetentionState(); bool IsStartOfLine = false; + bool IsCXXModuleDirective = false; char Buffer[256]; while (true) { // Two lines joined with line continuation ('\' as last character on the @@ -978,11 +982,38 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, *Callbacks->OS << static_cast(Byte); PrintComma = true; } + } else if (Tok.is(tok::annot_module_name)) { + auto *NameLoc = static_cast(Tok.getAnnotationValue()); + *Callbacks->OS << NameLoc->str(); } else if (Tok.isAnnotation()) { // Ignore annotation tokens created by pragmas - the pragmas themselves // will be reproduced in the preprocessed output. PP.Lex(Tok); continue; + } else if (PP.getLangOpts().CPlusPlusModules && Tok.is(tok::kw_import) && + !Callbacks->GetPrevToken().is(tok::at)) { + assert(!IsCXXModuleDirective && "Is an import directive being printed?"); + IsCXXModuleDirective = true; + IsStartOfLine = false; + *Callbacks->OS << tok::getPPKeywordSpelling( + tok::pp___preprocessed_import); + PP.Lex(Tok); + continue; + } else if (PP.getLangOpts().CPlusPlusModules && Tok.is(tok::kw_module)) { + assert(!IsCXXModuleDirective && "Is an module directive being printed?"); + IsCXXModuleDirective = true; + IsStartOfLine = false; + *Callbacks->OS << tok::getPPKeywordSpelling( + tok::pp___preprocessed_module); + PP.Lex(Tok); + continue; + } else if (PP.getLangOpts().CPlusPlusModules && IsCXXModuleDirective && + Tok.is(tok::semi)) { + IsCXXModuleDirective = false; + IsStartOfLine = true; + *Callbacks->OS << ';'; + PP.Lex(Tok); + continue; } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) { *Callbacks->OS << II->getName(); } else if (Tok.isLiteral() && !Tok.needsCleaning() && diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index eee57c786442a..974816107960a 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -84,6 +84,7 @@ struct Scanner { bool scan(SmallVectorImpl &Directives); friend bool clang::scanInputForCXX20ModulesUsage(StringRef Source); + friend bool clang::isPreprocessedModuleFile(StringRef Source); private: /// Lexes next token and advances \p First and the \p Lexer. @@ -174,6 +175,7 @@ struct Scanner { /// true at the end. bool reportError(const char *CurPtr, unsigned Err); + bool ScanningPreprocessedModuleFile = false; StringMap SplitIds; StringRef Input; SmallVectorImpl &Tokens; @@ -544,6 +546,12 @@ static void skipWhitespace(const char *&First, const char *const End) { bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, const char *const End) { + assert(Kind == DirectiveKind::cxx_export_import_decl || + Kind == DirectiveKind::cxx_export_module_decl || + Kind == DirectiveKind::cxx_import_decl || + Kind == DirectiveKind::cxx_module_decl || + Kind == DirectiveKind::decl_at_import); + const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; for (;;) { // Keep a copy of the First char incase it needs to be reset. @@ -555,7 +563,7 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, First = Previous; return false; } - if (Tok.is(tok::eof)) + if (Tok.isOneOf(tok::eof, tok::eod)) return reportError( DirectiveLoc, diag::err_dep_source_scanner_missing_semi_after_at_import); @@ -563,12 +571,25 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, break; } - const auto &Tok = lexToken(First, End); + bool IsCXXModules = Kind == DirectiveKind::cxx_export_import_decl || + Kind == DirectiveKind::cxx_export_module_decl || + Kind == DirectiveKind::cxx_import_decl || + Kind == DirectiveKind::cxx_module_decl; + if (IsCXXModules) { + lexPPDirectiveBody(First, End); + pushDirective(Kind); + return false; + } + pushDirective(Kind); - if (Tok.is(tok::eof) || Tok.is(tok::eod)) + skipWhitespace(First, End); + if (First == End) return false; - return reportError(DirectiveLoc, - diag::err_dep_source_scanner_unexpected_tokens_at_import); + if (!isVerticalWhitespace(*First)) + return reportError( + DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); + skipNewline(First, End); + return false; } dependency_directives_scan::Token &Scanner::lexToken(const char *&First, @@ -705,7 +726,12 @@ bool Scanner::lexModule(const char *&First, const char *const End) { Id = *NextId; } - if (Id != "module" && Id != "import") { + StringRef Module = + ScanningPreprocessedModuleFile ? "__preprocessed_module" : "module"; + StringRef Import = + ScanningPreprocessedModuleFile ? "__preprocessed_import" : "import"; + + if (Id != Module && Id != Import) { skipLine(First, End); return false; } @@ -718,7 +744,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) { switch (*First) { case ':': { // `module :` is never the start of a valid module declaration. - if (Id == "module") { + if (Id == Module) { skipLine(First, End); return false; } @@ -737,7 +763,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) { } case ';': { // Handle the global module fragment `module;`. - if (Id == "module" && !Export) + if (Id == Module && !Export) break; skipLine(First, End); return false; @@ -755,7 +781,7 @@ bool Scanner::lexModule(const char *&First, const char *const End) { TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); DirectiveKind Kind; - if (Id == "module") + if (Id == Module) Kind = Export ? cxx_export_module_decl : cxx_module_decl; else Kind = Export ? cxx_export_import_decl : cxx_import_decl; @@ -888,6 +914,19 @@ static bool isStartOfRelevantLine(char First) { return false; } +static inline bool isStartWithPreprocessedModuleDirective(const char *First, + const char *End) { + assert(First <= End); + if (*First == '_') { + StringRef Str(First, End - First); + return Str.starts_with( + tok::getPPKeywordSpelling(tok::pp___preprocessed_module)) || + Str.starts_with( + tok::getPPKeywordSpelling(tok::pp___preprocessed_import)); + } + return false; +} + bool Scanner::lexPPLine(const char *&First, const char *const End) { assert(First != End); @@ -912,7 +951,13 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { CurDirToks.clear(); }); - if (*First == '_') { + // FIXME: Shoule we handle @import as a preprocessing directive? + if (*First == '@') + return lexAt(First, End); + + bool IsPreprocessedModule = + isStartWithPreprocessedModuleDirective(First, End); + if (*First == '_' && !IsPreprocessedModule) { if (isNextIdentifierOrSkipLine("_Pragma", First, End)) return lex_Pragma(First, End); return false; @@ -924,12 +969,8 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { auto ScEx2 = make_scope_exit( [&]() { TheLexer.setParsingPreprocessorDirective(false); }); - // Handle "@import". - if (*First == '@') - return lexAt(First, End); - // Handle module directives for C++20 modules. - if (*First == 'i' || *First == 'e' || *First == 'm') + if (*First == 'i' || *First == 'e' || *First == 'm' || IsPreprocessedModule) return lexModule(First, End); // Lex '#'. @@ -1011,6 +1052,7 @@ bool Scanner::scanImpl(const char *First, const char *const End) { } bool Scanner::scan(SmallVectorImpl &Directives) { + ScanningPreprocessedModuleFile = clang::isPreprocessedModuleFile(Input); bool Error = scanImpl(Input.begin(), Input.end()); if (!Error) { @@ -1125,3 +1167,45 @@ bool clang::scanInputForCXX20ModulesUsage(StringRef Source) { }; return llvm::any_of(S.DirsWithToks, IsCXXNamedModuleDirective); } + +bool clang::isPreprocessedModuleFile(StringRef Source) { + const char *First = Source.begin(); + const char *const End = Source.end(); + + skipUntilMaybeCXX20ModuleDirective(First, End); + if (First == End) + return false; + + llvm::SmallVector Tokens; + Scanner S(StringRef(First, End - First), Tokens, nullptr, SourceLocation()); + while (First != End) { + if (*First == '#') { + ++First; + skipToNewlineRaw(First, End); + } else if (*First == 'e') { + S.TheLexer.seek(S.getOffsetAt(First), /*IsAtStartOfLine=*/true); + StringRef Id = S.lexIdentifier(First, End); + if (Id == "export") { + std::optional NextId = + S.tryLexIdentifierOrSkipLine(First, End); + if (!NextId) + return false; + Id = *NextId; + } + if (Id == "__preprocessed_module" || Id == "__preprocessed_import") + return true; + skipToNewlineRaw(First, End); + } else if (isStartWithPreprocessedModuleDirective(First, End)) + return true; + else + skipToNewlineRaw(First, End); + + skipWhitespace(First, End); + if (const auto Len = isEOL(First, End)) { + First += Len; + continue; + } + break; + } + return false; +} diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index b282a600c0e56..07f295936f46a 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -73,6 +73,22 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const { return specId ? specId->getObjCKeywordID() : tok::objc_not_keyword; } +/// Return true if we have an C++20 Modules contextual keyword(export, import +/// or module). +bool Token::isModuleContextualKeyword(const LangOptions &LangOpts, + bool AllowExport) const { + if (!LangOpts.CPlusPlusModules) + return false; + if (AllowExport && is(tok::kw_export)) + return true; + if (isOneOf(tok::kw_import, tok::kw_module)) + return true; + if (isNot(tok::identifier)) + return false; + const auto *II = getIdentifierInfo(); + return II->isModulesImport() || II->isModulesDeclaration(); +} + /// Determine whether the token kind starts a simple-type-specifier. bool Token::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { switch (getKind()) { @@ -4020,11 +4036,21 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/ case 'v': case 'w': case 'x': case 'y': case 'z': - case '_': + case '_': { // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - return LexIdentifierContinue(Result, CurPtr); - + bool returnedToken = LexIdentifierContinue(Result, CurPtr); + + // Check eof token first, because EOF may be encountered in + // LexIdentifierContinue, and the current lexer may then be made invalid by + // HandleEndOfFile. + if (returnedToken && Result.isNot(tok::eof) && + Result.isModuleContextualKeyword(LangOpts) && !LexingRawMode && + !Is_PragmaLexer && !ParsingPreprocessorDirective && PP && + PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine)) + goto HandleDirective; + return returnedToken; + } case '$': // $ in identifiers. if (LangOpts.DollarIdents) { if (!isLexingRawMode()) @@ -4227,8 +4253,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { + // We parsed a # character and it's the start of a preprocessing + // directive. + FormTokenWithChars(Result, CurPtr, tok::hash); goto HandleDirective; + } Kind = tok::hash; } @@ -4415,8 +4445,12 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // it's actually the start of a preprocessing directive. Callback to // the preprocessor to handle it. // TODO: -fpreprocessed mode?? - if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) + if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) { + // We parsed a # character and it's the start of a preprocessing + // directive. + FormTokenWithChars(Result, CurPtr, tok::hash); goto HandleDirective; + } Kind = tok::hash; } @@ -4506,9 +4540,6 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { return true; HandleDirective: - // We parsed a # character and it's the start of a preprocessing directive. - - FormTokenWithChars(Result, CurPtr, tok::hash); PP->HandleDirective(Result); if (PP->hadModuleLoaderFatalFailure()) @@ -4531,6 +4562,10 @@ const char *Lexer::convertDependencyDirectiveToken( Result.setKind(DDTok.Kind); Result.setFlag((Token::TokenFlags)DDTok.Flags); Result.setLength(DDTok.Length); + if (Result.is(tok::raw_identifier)) + Result.setRawIdentifierData(TokPtr); + else if (Result.isLiteral()) + Result.setLiteralData(TokPtr); BufferPtr = TokPtr + DDTok.Length; return TokPtr; } @@ -4588,15 +4623,18 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) { Result.setRawIdentifierData(TokPtr); if (!isLexingRawMode()) { const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (Result.isModuleContextualKeyword(LangOpts) && + PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) { + PP->HandleDirective(Result); + return false; + } if (II->isHandleIdentifierCase()) return PP->HandleIdentifier(Result); } return true; } - if (Result.isLiteral()) { - Result.setLiteralData(TokPtr); + if (Result.isLiteral()) return true; - } if (Result.is(tok::colon)) { // Convert consecutive colons to 'tok::coloncolon'. if (*BufferPtr == ':') { diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 360593d0f33df..461bc4a14d6e7 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -82,14 +82,19 @@ Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc, /// Read and discard all tokens remaining on the current line until /// the tok::eod token is found. -SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) { +SourceRange Preprocessor::DiscardUntilEndOfDirective( + Token &Tmp, SmallVectorImpl *DiscardedToks) { SourceRange Res; - - LexUnexpandedToken(Tmp); + auto ReadNextTok = [&]() { + LexUnexpandedToken(Tmp); + if (DiscardedToks && Tmp.isNot(tok::eod)) + DiscardedToks->push_back(Tmp); + }; + ReadNextTok(); Res.setBegin(Tmp.getLocation()); while (Tmp.isNot(tok::eod)) { assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens"); - LexUnexpandedToken(Tmp); + ReadNextTok(); } Res.setEnd(Tmp.getLocation()); return Res; @@ -439,21 +444,27 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef, /// true, then we consider macros that expand to zero tokens as being ok. /// /// Returns the location of the end of the directive. -SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, - bool EnableMacros) { +SourceLocation +Preprocessor::CheckEndOfDirective(StringRef DirType, bool EnableMacros, + SmallVectorImpl *ExtraToks) { Token Tmp; + auto ReadNextTok = [this, ExtraToks, &Tmp](auto &&LexFn) { + std::invoke(LexFn, this, Tmp); + if (ExtraToks && Tmp.isNot(tok::eod)) + ExtraToks->push_back(Tmp); + }; // Lex unexpanded tokens for most directives: macros might expand to zero // tokens, causing us to miss diagnosing invalid lines. Some directives (like // #line) allow empty macros. if (EnableMacros) - Lex(Tmp); + ReadNextTok(&Preprocessor::Lex); else - LexUnexpandedToken(Tmp); + ReadNextTok(&Preprocessor::LexUnexpandedToken); // There should be no tokens after the directive, but we allow them as an // extension. while (Tmp.is(tok::comment)) // Skip comments in -C mode. - LexUnexpandedToken(Tmp); + ReadNextTok(&Preprocessor::LexUnexpandedToken); if (Tmp.is(tok::eod)) return Tmp.getLocation(); @@ -466,8 +477,15 @@ SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType, if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) && !CurTokenLexer) Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//"); - Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint; - return DiscardUntilEndOfDirective().getEnd(); + + unsigned DiagID = diag::ext_pp_extra_tokens_at_eol; + // C++20 import or module directive has no '#' prefix. + if (getLangOpts().CPlusPlusModules && + (DirType == "import" || DirType == "module")) + DiagID = diag::ext_pp_extra_tokens_at_module_directive_eol; + + Diag(Tmp, DiagID) << DirType << Hint; + return DiscardUntilEndOfDirective(ExtraToks).getEnd(); } void Preprocessor::SuggestTypoedDirective(const Token &Tok, @@ -1242,12 +1260,14 @@ void Preprocessor::HandleDirective(Token &Result) { // pp-directive. bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal(); - // Save the '#' token in case we need to return it later. - Token SavedHash = Result; + // Save the directive-introducing token('#' and import/module in C++20) in + // case we need to return it later. + Token Introducer = Result; // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. - LexUnexpandedToken(Result); + if (Introducer.is(tok::hash)) + LexUnexpandedToken(Result); // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: // #define A(x) #x @@ -1266,7 +1286,13 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp___include_macros: case tok::pp_pragma: case tok::pp_embed: - Diag(Result, diag::err_embedded_directive) << II->getName(); + case tok::pp_module: + case tok::pp___preprocessed_module: + case tok::pp___preprocessed_import: + Diag(Result, diag::err_embedded_directive) + << Introducer.isModuleContextualKeyword(getLangOpts(), + /*AllowExport=*/false) + << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); DiscardUntilEndOfDirective(); @@ -1283,7 +1309,8 @@ void Preprocessor::HandleDirective(Token &Result) { ResetMacroExpansionHelper helper(this); if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop) - return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation()); + return HandleSkippedDirectiveWhileUsingPCH(Result, + Introducer.getLocation()); switch (Result.getKind()) { case tok::eod: @@ -1303,7 +1330,7 @@ void Preprocessor::HandleDirective(Token &Result) { // directive. However do permit it in the predefines file, as we use line // markers to mark the builtin macros as being in a system header. if (getLangOpts().AsmPreprocessor && - SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID()) + SourceMgr.getFileID(Introducer.getLocation()) != getPredefinesFileID()) break; return HandleDigitDirective(Result); default: @@ -1315,30 +1342,32 @@ void Preprocessor::HandleDirective(Token &Result) { default: break; // C99 6.10.1 - Conditional Inclusion. case tok::pp_if: - return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective); + return HandleIfDirective(Result, Introducer, + ReadAnyTokensBeforeDirective); case tok::pp_ifdef: - return HandleIfdefDirective(Result, SavedHash, false, + return HandleIfdefDirective(Result, Introducer, false, true /*not valid for miopt*/); case tok::pp_ifndef: - return HandleIfdefDirective(Result, SavedHash, true, + return HandleIfdefDirective(Result, Introducer, true, ReadAnyTokensBeforeDirective); case tok::pp_elif: case tok::pp_elifdef: case tok::pp_elifndef: - return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID()); + return HandleElifFamilyDirective(Result, Introducer, + II->getPPKeywordID()); case tok::pp_else: - return HandleElseDirective(Result, SavedHash); + return HandleElseDirective(Result, Introducer); case tok::pp_endif: return HandleEndifDirective(Result); // C99 6.10.2 - Source File Inclusion. case tok::pp_include: // Handle #include. - return HandleIncludeDirective(SavedHash.getLocation(), Result); + return HandleIncludeDirective(Introducer.getLocation(), Result); case tok::pp___include_macros: // Handle -imacros. - return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result); + return HandleIncludeMacrosDirective(Introducer.getLocation(), Result); // C99 6.10.3 - Macro Replacement. case tok::pp_define: @@ -1356,13 +1385,20 @@ void Preprocessor::HandleDirective(Token &Result) { // C99 6.10.6 - Pragma Directive. case tok::pp_pragma: - return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()}); - + return HandlePragmaDirective({PIK_HashPragma, Introducer.getLocation()}); + case tok::pp_module: + case tok::pp___preprocessed_module: + return HandleCXXModuleDirective(Result); + case tok::pp___preprocessed_import: + return HandleCXXImportDirective(Result); // GNU Extensions. case tok::pp_import: - return HandleImportDirective(SavedHash.getLocation(), Result); + if (Introducer.isModuleContextualKeyword(getLangOpts(), + /*AllowExport=*/false)) + return HandleCXXImportDirective(Result); + return HandleImportDirective(Introducer.getLocation(), Result); case tok::pp_include_next: - return HandleIncludeNextDirective(SavedHash.getLocation(), Result); + return HandleIncludeNextDirective(Introducer.getLocation(), Result); case tok::pp_warning: if (LangOpts.CPlusPlus) @@ -1381,7 +1417,7 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp_sccs: return HandleIdentSCCSDirective(Result); case tok::pp_embed: - return HandleEmbedDirective(SavedHash.getLocation(), Result, + return HandleEmbedDirective(Introducer.getLocation(), Result, getCurrentFileLexer() ? *getCurrentFileLexer()->getFileEntry() : static_cast(nullptr)); @@ -1412,7 +1448,7 @@ void Preprocessor::HandleDirective(Token &Result) { if (getLangOpts().AsmPreprocessor) { auto Toks = std::make_unique(2); // Return the # and the token after it. - Toks[0] = SavedHash; + Toks[0] = Introducer; Toks[1] = Result; // If the second token is a hashhash token, then we need to translate it to @@ -4054,3 +4090,289 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, StringRef(static_cast(Mem), OriginalFilename.size()); HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents, FilenameToGo); } + +void Preprocessor::HandleCXXImportDirective(Token ImportTok) { + assert(getLangOpts().CPlusPlusModules && ImportTok.is(tok::kw_import)); + llvm::SaveAndRestore SaveImportingCXXModules( + this->ImportingCXXNamedModules, true); + + if (LastTokenWasExportKeyword.isValid()) + LastTokenWasExportKeyword.reset(); + + Token Tok; + if (LexHeaderName(Tok)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + return; + } + + SourceLocation UseLoc = ImportTok.getLocation(); + SmallVector DirToks{ImportTok}; + SmallVector Path; + bool ImportingHeader = false; + bool IsPartition = false; + std::string FlatName; + switch (Tok.getKind()) { + case tok::header_name: + ImportingHeader = true; + DirToks.push_back(Tok); + break; + case tok::colon: + IsPartition = true; + DirToks.push_back(Tok); + UseLoc = Tok.getLocation(); + Lex(Tok); + [[fallthrough]]; + case tok::identifier: { + bool LeadingSpace = Tok.hasLeadingSpace(); + unsigned NumToksInDirective = DirToks.size(); + if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + + // Clean the module-name tokens and replace these tokens with + // annot_module_name. + DirToks.resize(NumToksInDirective); + ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path); + DirToks.emplace_back(); + DirToks.back().setKind(tok::annot_module_name); + DirToks.back().setAnnotationRange(NameLoc->getRange()); + DirToks.back().setAnnotationValue(static_cast(NameLoc)); + DirToks.back().setFlagValue(Token::LeadingSpace, LeadingSpace); + DirToks.push_back(Tok); + + bool IsValid = + (IsPartition && ModuleDeclState.isNamedModule()) || !IsPartition; + if (Callbacks && IsValid) { + if (IsPartition && ModuleDeclState.isNamedModule()) { + FlatName += ModuleDeclState.getPrimaryName(); + FlatName += ":"; + } + + FlatName += ModuleLoader::getFlatNameFromPath(Path); + SourceLocation StartLoc = IsPartition ? UseLoc : Path[0].getLoc(); + IdentifierLoc FlatNameLoc(StartLoc, getIdentifierInfo(FlatName)); + + // We don't/shouldn't load the standard c++20 modules when preprocessing. + // so the imported module is nullptr. + Callbacks->moduleImport(ImportTok.getLocation(), + ModuleIdPath(FlatNameLoc), + /*Imported=*/nullptr); + } + break; + } + default: + DirToks.push_back(Tok); + break; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) + CollectPPImportSuffix(DirToks); + + if (DirToks.back().isNot(tok::eod)) + CheckEndOfDirective(ImportTok.getIdentifierInfo()->getName()); + else + DirToks.pop_back(); + + // This is not a pp-import after all. + if (DirToks.back().isNot(tok::semi)) { + EnterModuleSuffixTokenStream(DirToks); + return; + } + + if (ImportingHeader) { + // C++2a [cpp.module]p1: + // The ';' preprocessing-token terminating a pp-import shall not have + // been produced by macro replacement. + SourceLocation SemiLoc = DirToks.back().getLocation(); + if (SemiLoc.isMacroID()) + Diag(SemiLoc, diag::err_header_import_semi_in_macro); + + auto Action = HandleHeaderIncludeOrImport( + /*HashLoc*/ SourceLocation(), ImportTok, Tok, SemiLoc); + switch (Action.Kind) { + case ImportAction::None: + break; + + case ImportAction::ModuleBegin: + // Let the parser know we're textually entering the module. + DirToks.emplace_back(); + DirToks.back().startToken(); + DirToks.back().setKind(tok::annot_module_begin); + DirToks.back().setLocation(SemiLoc); + DirToks.back().setAnnotationEndLoc(SemiLoc); + DirToks.back().setAnnotationValue(Action.ModuleForHeader); + [[fallthrough]]; + + case ImportAction::ModuleImport: + case ImportAction::HeaderUnitImport: + case ImportAction::SkippedModuleImport: + // We chose to import (or textually enter) the file. Convert the + // header-name token into a header unit annotation token. + DirToks[1].setKind(tok::annot_header_unit); + DirToks[1].setAnnotationEndLoc(DirToks[0].getLocation()); + DirToks[1].setAnnotationValue(Action.ModuleForHeader); + // FIXME: Call the moduleImport callback? + break; + case ImportAction::Failure: + assert(TheModuleLoader.HadFatalFailure && + "This should be an early exit only to a fatal error"); + CurLexer->cutOffLexing(); + return; + } + } + + EnterModuleSuffixTokenStream(DirToks); +} + +void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) { + assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module)); + Token Introducer = ModuleTok; + if (LastTokenWasExportKeyword.isValid()) { + Introducer = LastTokenWasExportKeyword.getExportTok(); + LastTokenWasExportKeyword.reset(); + } + + SourceLocation StartLoc = Introducer.getLocation(); + if (!IncludeMacroStack.empty()) { + SourceLocation End = DiscardUntilEndOfDirective().getEnd(); + Diag(StartLoc, diag::err_pp_module_decl_in_header) + << SourceRange(StartLoc, End); + return; + } + + if (CurPPLexer->getConditionalStackDepth() != 0) { + SourceLocation End = DiscardUntilEndOfDirective().getEnd(); + Diag(StartLoc, diag::err_pp_cond_span_module_decl) + << SourceRange(StartLoc, End); + return; + } + + Token Tok; + SourceLocation UseLoc = ModuleTok.getLocation(); + SmallVector DirToks{ModuleTok}; + SmallVector Path, Partition; + LexUnexpandedToken(Tok); + + switch (Tok.getKind()) { + // Global Module Fragment. + case tok::semi: + DirToks.push_back(Tok); + break; + case tok::colon: + DirToks.push_back(Tok); + LexUnexpandedToken(Tok); + if (Tok.isNot(tok::kw_private)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + DirToks.push_back(Tok); + break; + case tok::identifier: { + bool LeadingSpace = Tok.hasLeadingSpace(); + unsigned NumToksInDirective = DirToks.size(); + + // C++ [cpp.module]p3: Any preprocessing tokens after the module + // preprocessing token in the module directive are processed just as in + // normal text. + // + // P3034R1 Module Declarations Shouldn’t be Macros. + if (LexModuleNameContinue(Tok, UseLoc, DirToks, Path, + /*AllowMacroExpansion=*/false)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + + ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path); + DirToks.resize(NumToksInDirective); + DirToks.emplace_back(); + DirToks.back().setKind(tok::annot_module_name); + DirToks.back().setAnnotationRange(NameLoc->getRange()); + DirToks.back().setAnnotationValue(static_cast(NameLoc)); + DirToks.back().setFlagValue(Token::LeadingSpace, LeadingSpace); + DirToks.push_back(Tok); + + // C++20 [cpp.module]p + // The pp-tokens, if any, of a pp-module shall be of the form: + // pp-module-name pp-module-partition[opt] pp-tokens[opt] + if (Tok.is(tok::colon)) { + NumToksInDirective = DirToks.size(); + LexUnexpandedToken(Tok); + LeadingSpace = Tok.hasLeadingSpace(); + if (LexModuleNameContinue(Tok, UseLoc, DirToks, Partition, + /*AllowMacroExpansion=*/false, + /*IsPartition=*/true)) { + if (Tok.isNot(tok::eod)) + CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + EnterModuleSuffixTokenStream(DirToks); + return; + } + + ModuleNameLoc *PartitionLoc = ModuleNameLoc::Create(*this, Partition); + DirToks.resize(NumToksInDirective); + DirToks.emplace_back(); + DirToks.back().setKind(tok::annot_module_name); + DirToks.back().setAnnotationRange(NameLoc->getRange()); + DirToks.back().setAnnotationValue(static_cast(PartitionLoc)); + DirToks.back().setFlagValue(Token::LeadingSpace, LeadingSpace); + DirToks.push_back(Tok); + } + + // If the current token is a macro definition, put it back to token stream + // and expand any macros in it later. + // + // export module M ATTR(some_attr); // -D'ATTR(x)=[[x]]' + // + // Current token is `ATTR`. + if (Tok.is(tok::identifier) && + getMacroDefinition(Tok.getIdentifierInfo())) { + std::unique_ptr TokCopy = std::make_unique(1); + TokCopy[0] = Tok; + EnterTokenStream(std::move(TokCopy), /*NumToks=*/1, + /*DisableMacroExpansion=*/false, /*IsReinject=*/false); + Lex(Tok); + DirToks.back() = Tok; + } + break; + } + default: + DirToks.push_back(Tok); + break; + } + + // Consume the pp-import-suffix and expand any macros in it now, if we're not + // at the semicolon already. + SourceLocation End = DirToks.back().getLocation(); + if (!DirToks.back().isOneOf(tok::semi, tok::eod)) { + // Consume the pp-import-suffix and expand any macros in it now. We'll add + // it back into the token stream later. + CollectPPImportSuffix(DirToks); + End = DirToks.back().getLocation(); + } + + if (DirToks.back().isNot(tok::eod)) + End = CheckEndOfDirective(ModuleTok.getIdentifierInfo()->getName(), + /*EnableMacros=*/false, &DirToks); + else + End = DirToks.pop_back_val().getLocation(); + + // export module m + // ; + // + // FIXME: Do we need to strictly check whether ';' and module directive are in + // the same line? + EnterModuleSuffixTokenStream(DirToks); +} diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index e003ad3a95570..59a474b50b857 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -35,6 +35,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/CodeCompletionHandler.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/ExternalPreprocessorSource.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/LexDiagnostic.h" @@ -55,11 +56,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -576,6 +580,11 @@ void Preprocessor::EnterMainSourceFile() { // export module M; // error: module declaration must occur // // at the start of the translation unit. if (getLangOpts().CPlusPlusModules) { + std::optional Input = + getSourceManager().getBufferDataOrNone(MainFileID); + if (Input) + MainFileIsPreprocessedModuleFile = + clang::isPreprocessedModuleFile(*Input); auto Tracer = std::make_unique(*this); DirTracer = Tracer.get(); addPPCallbacks(std::move(Tracer)); @@ -877,13 +886,14 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) { // Likewise if this is the standard C++ import keyword. if (((LastTokenWasAt && II.isModulesImport()) || Identifier.is(tok::kw_import)) && - !InMacroArgs && !DisableMacroExpansion && - (getLangOpts().Modules || getLangOpts().DebuggerSupport) && + // FIXME: Can we just ignore DisableMacroExpansion here? + // https://github.com/llvm/llvm-project/pull/137665 disable + // macro expansion when current input file is preprocessed. + !InMacroArgs && + (!DisableMacroExpansion || MacroExpansionInDirectivesOverride) && CurLexerCallback != CLK_CachingLexer) { ModuleImportLoc = Identifier.getLocation(); - NamedModuleImportPath.clear(); IsAtImport = true; - ModuleImportExpectsIdentifier = true; CurLexerCallback = CLK_LexAfterModuleImport; } return true; @@ -932,6 +942,7 @@ void Preprocessor::Lex(Token &Result) { // This token is injected to represent the translation of '#include "a.h"' // into "import a.h;". Mimic the notional ';'. case tok::annot_module_include: + case tok::annot_repl_input_end: case tok::semi: TrackGMFState.handleSemi(); StdCXXImportSeqState.handleSemi(); @@ -951,35 +962,23 @@ void Preprocessor::Lex(Token &Result) { case tok::colon: ModuleDeclState.handleColon(); break; - case tok::period: - ModuleDeclState.handlePeriod(); - break; - case tok::eod: + case tok::kw_import: + if (StdCXXImportSeqState.atTopLevel()) { + TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); + StdCXXImportSeqState.handleImport(); + } break; - case tok::identifier: - // Check "import" and "module" when there is no open bracket. The two - // identifiers are not meaningful with open brackets. + case tok::kw_module: if (StdCXXImportSeqState.atTopLevel()) { - if (Result.getIdentifierInfo()->isModulesImport()) { - TrackGMFState.handleImport(StdCXXImportSeqState.afterTopLevelSeq()); - StdCXXImportSeqState.handleImport(); - if (StdCXXImportSeqState.afterImportSeq()) { - ModuleImportLoc = Result.getLocation(); - NamedModuleImportPath.clear(); - IsAtImport = false; - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - } - break; - } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) { - if (hasSeenNoTrivialPPDirective()) - Result.setFlag(Token::HasSeenNoTrivialPPDirective); - TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); - ModuleDeclState.handleModule(); - break; - } + if (hasSeenNoTrivialPPDirective()) + Result.setFlag(Token::HasSeenNoTrivialPPDirective); + TrackGMFState.handleModule(StdCXXImportSeqState.afterTopLevelSeq()); + ModuleDeclState.handleModule(); } - ModuleDeclState.handleIdentifier(Result.getIdentifierInfo()); + break; + case tok::annot_module_name: + ModuleDeclState.handleModuleName( + static_cast(Result.getAnnotationValue())); if (ModuleDeclState.isModuleCandidate()) break; [[fallthrough]]; @@ -997,6 +996,9 @@ void Preprocessor::Lex(Token &Result) { } LastTokenWasAt = Result.is(tok::at); + if (Result.isNot(tok::kw_export)) + LastTokenWasExportKeyword.reset(); + --LexLevel; if ((LexLevel == 0 || PreprocessToken) && @@ -1119,41 +1121,202 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) { return false; } +// We represent the primary and partition names as 'Paths' which are sections +// of the hierarchical access path for a clang module. However for C++20 +// the periods in a name are just another character, and we will need to +// flatten them into a string. +std::string ModuleLoader::getFlatNameFromPath(ModuleIdPath Path) { + std::string Name; + if (Path.empty()) + return Name; + + for (auto &Piece : Path) { + assert(Piece.getIdentifierInfo() && Piece.getLoc().isValid()); + if (!Name.empty()) + Name += "."; + Name += Piece.getIdentifierInfo()->getName(); + } + return Name; +} + +ModuleNameLoc *ModuleNameLoc::Create(Preprocessor &PP, ModuleIdPath Path) { + assert(!Path.empty() && "expect at least one identifier in a module name"); + void *Mem = PP.getPreprocessorAllocator().Allocate( + totalSizeToAlloc(Path.size()), alignof(ModuleNameLoc)); + return new (Mem) ModuleNameLoc(Path); +} + +bool Preprocessor::LexModuleNameContinue(Token &Tok, SourceLocation UseLoc, + SmallVectorImpl &Suffix, + SmallVectorImpl &Path, + bool AllowMacroExpansion, + bool IsPartition) { + auto ConsumeToken = [&]() { + if (AllowMacroExpansion) + Lex(Tok); + else + LexUnexpandedToken(Tok); + Suffix.push_back(Tok); + }; + + while (true) { + if (Tok.isNot(tok::identifier)) { + if (Tok.is(tok::code_completion)) { + CurLexer->cutOffLexing(); + CodeComplete->CodeCompleteModuleImport(UseLoc, Path); + return true; + } + + Diag(Tok, diag::err_pp_module_expected_ident) << Path.empty(); + return true; + } + + // [cpp.pre]/p2: + // No identifier in the pp-module-name or pp-module-partition shall + // currently be defined as an object-like macro. + if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()); + MI && MI->isObjectLike() && getLangOpts().CPlusPlus20 && + !AllowMacroExpansion) { + Diag(Tok, diag::err_pp_module_name_is_macro) + << IsPartition << Tok.getIdentifierInfo(); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << Tok.getIdentifierInfo(); + } + + // Record this part of the module path. + Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo()); + ConsumeToken(); + + if (Tok.isNot(tok::period)) + return false; + + ConsumeToken(); + } +} + +/// P1857R3: Modules Dependency Discovery +/// +/// At the start of phase 4 an import or module token is treated as starting a +/// directive and are converted to their respective keywords iff: +/// - After skipping horizontal whitespace are +/// - at the start of a logical line, or +/// - preceded by an 'export' at the start of the logical line. +/// - Are followed by an identifier pp token (before macro expansion), or +/// - <, ", or : (but not ::) pp tokens for 'import', or +/// - ; for 'module' +/// Otherwise the token is treated as an identifier. +bool Preprocessor::HandleModuleContextualKeyword( + Token &Result, bool TokAtPhysicalStartOfLine) { + if (!Result.isModuleContextualKeyword(getLangOpts())) + return false; + + if (Result.is(tok::kw_export)) { + LastTokenWasExportKeyword = {Result, TokAtPhysicalStartOfLine}; + return false; + } + + /// Trait 'module' and 'import' as a identifier when the main file is a + /// preprocessed module file. We only allow '__preprocessed_module' and + /// '__preprocessed_import' in this context. + IdentifierInfo *II = Result.getIdentifierInfo(); + if (isPreprocessedModuleFile() && + (II->isStr(tok::getKeywordSpelling(tok::kw_import)) || + II->isStr(tok::getKeywordSpelling(tok::kw_module)))) + return false; + + if (LastTokenWasExportKeyword.isValid()) { + // The export keyword was not at the start of line, it's not a + // directive-introducing token. + if (!LastTokenWasExportKeyword.isAtPhysicalStartOfLine()) + return false; + // [cpp.pre]/1.4 + // export // not a preprocessing directive + // import foo; // preprocessing directive (ill-formed at phase7) + if (TokAtPhysicalStartOfLine) + return false; + } else if (!TokAtPhysicalStartOfLine) + return false; + + llvm::SaveAndRestore SavedParsingPreprocessorDirective( + CurPPLexer->ParsingPreprocessorDirective, true); + + // The next token may be an angled string literal after import keyword. + llvm::SaveAndRestore SavedParsingFilemame( + CurPPLexer->ParsingFilename, + Result.getIdentifierInfo()->isModulesImport()); + + std::optional NextTok = + CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken(); + if (!NextTok) + return false; + + if (NextTok->is(tok::raw_identifier)) + LookUpIdentifierInfo(*NextTok); + + if (Result.getIdentifierInfo()->isModulesImport()) { + if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon, + tok::header_name)) { + Result.setKind(tok::kw_import); + ModuleImportLoc = Result.getLocation(); + IsAtImport = false; + return true; + } + } + + if (Result.getIdentifierInfo()->isModulesDeclaration() && + NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) { + Result.setKind(tok::kw_module); + ModuleDeclLoc = Result.getLocation(); + return true; + } + + // Ok, it's an identifier. + return false; +} + +bool Preprocessor::CollectPPImportSuffixAndEnterStream( + SmallVectorImpl &Toks, bool StopUntilEOD) { + CollectPPImportSuffix(Toks); + EnterModuleSuffixTokenStream(Toks); + return false; +} + /// Collect the tokens of a C++20 pp-import-suffix. -void Preprocessor::CollectPpImportSuffix(SmallVectorImpl &Toks) { +void Preprocessor::CollectPPImportSuffix(SmallVectorImpl &Toks, + bool StopUntilEOD) { // FIXME: For error recovery, consider recognizing attribute syntax here // and terminating / diagnosing a missing semicolon if we find anything // else? (Can we leave that to the parser?) - unsigned BracketDepth = 0; while (true) { Toks.emplace_back(); Lex(Toks.back()); switch (Toks.back().getKind()) { - case tok::l_paren: case tok::l_square: case tok::l_brace: - ++BracketDepth; - break; - - case tok::r_paren: case tok::r_square: case tok::r_brace: - if (BracketDepth == 0) - return; - --BracketDepth; - break; - case tok::semi: - if (BracketDepth == 0) + if (!StopUntilEOD) return; - break; - + [[fallthrough]]; + case tok::eod: case tok::eof: return; - default: break; } } } +// Allocate a holding buffer for a sequence of tokens and introduce it into +// the token stream. +void Preprocessor::EnterModuleSuffixTokenStream(ArrayRef Toks) { + if (Toks.empty()) + return; + auto ToksCopy = std::make_unique(Toks.size()); + std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); + EnterTokenStream(std::move(ToksCopy), Toks.size(), + /*DisableMacroExpansion*/ false, /*IsReinject*/ false); + assert(CurTokenLexer && "Must have a TokenLexer"); + CurTokenLexer->setLexingCXXModuleDirective(); +} /// Lex a token following the 'import' contextual keyword. /// @@ -1178,186 +1341,47 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { // Figure out what kind of lexer we actually have. recomputeCurLexerKind(); - // Lex the next token. The header-name lexing rules are used at the start of - // a pp-import. - // - // For now, we only support header-name imports in C++20 mode. - // FIXME: Should we allow this in all language modes that support an import - // declaration as an extension? - if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) { - if (LexHeaderName(Result)) - return true; - - if (Result.is(tok::colon) && ModuleDeclState.isNamedModule()) { - std::string Name = ModuleDeclState.getPrimaryName().str(); - Name += ":"; - NamedModuleImportPath.emplace_back(Result.getLocation(), - getIdentifierInfo(Name)); - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - } else { - Lex(Result); - } - - // Allocate a holding buffer for a sequence of tokens and introduce it into - // the token stream. - auto EnterTokens = [this](ArrayRef Toks) { - auto ToksCopy = std::make_unique(Toks.size()); - std::copy(Toks.begin(), Toks.end(), ToksCopy.get()); - EnterTokenStream(std::move(ToksCopy), Toks.size(), - /*DisableMacroExpansion*/ true, /*IsReinject*/ false); - }; - - bool ImportingHeader = Result.is(tok::header_name); - // Check for a header-name. SmallVector Suffix; - if (ImportingHeader) { - // Enter the header-name token into the token stream; a Lex action cannot - // both return a token and cache tokens (doing so would corrupt the token - // cache if the call to Lex comes from CachingLex / PeekAhead). - Suffix.push_back(Result); - - // Consume the pp-import-suffix and expand any macros in it now. We'll add - // it back into the token stream later. - CollectPpImportSuffix(Suffix); - if (Suffix.back().isNot(tok::semi)) { - // This is not a pp-import after all. - EnterTokens(Suffix); - return false; - } - - // C++2a [cpp.module]p1: - // The ';' preprocessing-token terminating a pp-import shall not have - // been produced by macro replacement. - SourceLocation SemiLoc = Suffix.back().getLocation(); - if (SemiLoc.isMacroID()) - Diag(SemiLoc, diag::err_header_import_semi_in_macro); - - // Reconstitute the import token. - Token ImportTok; - ImportTok.startToken(); - ImportTok.setKind(tok::kw_import); - ImportTok.setLocation(ModuleImportLoc); - ImportTok.setIdentifierInfo(getIdentifierInfo("import")); - ImportTok.setLength(6); - - auto Action = HandleHeaderIncludeOrImport( - /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc); - switch (Action.Kind) { - case ImportAction::None: - break; - - case ImportAction::ModuleBegin: - // Let the parser know we're textually entering the module. - Suffix.emplace_back(); - Suffix.back().startToken(); - Suffix.back().setKind(tok::annot_module_begin); - Suffix.back().setLocation(SemiLoc); - Suffix.back().setAnnotationEndLoc(SemiLoc); - Suffix.back().setAnnotationValue(Action.ModuleForHeader); - [[fallthrough]]; - - case ImportAction::ModuleImport: - case ImportAction::HeaderUnitImport: - case ImportAction::SkippedModuleImport: - // We chose to import (or textually enter) the file. Convert the - // header-name token into a header unit annotation token. - Suffix[0].setKind(tok::annot_header_unit); - Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation()); - Suffix[0].setAnnotationValue(Action.ModuleForHeader); - // FIXME: Call the moduleImport callback? - break; - case ImportAction::Failure: - assert(TheModuleLoader.HadFatalFailure && - "This should be an early exit only to a fatal error"); - Result.setKind(tok::eof); - CurLexer->cutOffLexing(); - EnterTokens(Suffix); - return true; - } - - EnterTokens(Suffix); - return false; - } - - // The token sequence - // - // import identifier (. identifier)* - // - // indicates a module import directive. We already saw the 'import' - // contextual keyword, so now we're looking for the identifiers. - if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { - // We expected to see an identifier here, and we did; continue handling - // identifiers. - NamedModuleImportPath.emplace_back(Result.getLocation(), - Result.getIdentifierInfo()); - ModuleImportExpectsIdentifier = false; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we're expecting a '.' or a ';', and we got a '.', then wait until we - // see the next identifier. (We can also see a '[[' that begins an - // attribute-specifier-seq here under the Standard C++ Modules.) - if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { - ModuleImportExpectsIdentifier = true; - CurLexerCallback = CLK_LexAfterModuleImport; - return true; - } - - // If we didn't recognize a module name at all, this is not a (valid) import. - if (NamedModuleImportPath.empty() || Result.is(tok::eof)) - return true; + SmallVector Path; + Lex(Result); + if (LexModuleNameContinue(Result, ModuleImportLoc, Suffix, Path)) + return CollectPPImportSuffixAndEnterStream(Suffix); + + ModuleNameLoc *NameLoc = ModuleNameLoc::Create(*this, Path); + Suffix.clear(); + Suffix.emplace_back(); + Suffix.back().setKind(tok::annot_module_name); + Suffix.back().setAnnotationRange(NameLoc->getRange()); + Suffix.back().setAnnotationValue(static_cast(NameLoc)); + Suffix.push_back(Result); // Consume the pp-import-suffix and expand any macros in it now, if we're not // at the semicolon already. SourceLocation SemiLoc = Result.getLocation(); - if (Result.isNot(tok::semi)) { - Suffix.push_back(Result); - CollectPpImportSuffix(Suffix); + if (Suffix.back().isNot(tok::semi)) { + if (Suffix.back().isNot(tok::eof)) + CollectPPImportSuffix(Suffix); if (Suffix.back().isNot(tok::semi)) { // This is not an import after all. - EnterTokens(Suffix); + EnterModuleSuffixTokenStream(Suffix); return false; } SemiLoc = Suffix.back().getLocation(); } - // Under the standard C++ Modules, the dot is just part of the module name, - // and not a real hierarchy separator. Flatten such module names now. - // - // FIXME: Is this the right level to be performing this transformation? - std::string FlatModuleName; - if (getLangOpts().CPlusPlusModules) { - for (auto &Piece : NamedModuleImportPath) { - // If the FlatModuleName ends with colon, it implies it is a partition. - if (!FlatModuleName.empty() && FlatModuleName.back() != ':') - FlatModuleName += "."; - FlatModuleName += Piece.getIdentifierInfo()->getName(); - } - SourceLocation FirstPathLoc = NamedModuleImportPath[0].getLoc(); - NamedModuleImportPath.clear(); - NamedModuleImportPath.emplace_back(FirstPathLoc, - getIdentifierInfo(FlatModuleName)); - } - Module *Imported = nullptr; - // We don't/shouldn't load the standard c++20 modules when preprocessing. - if (getLangOpts().Modules && !isInImportingCXXNamedModules()) { - Imported = TheModuleLoader.loadModule(ModuleImportLoc, - NamedModuleImportPath, - Module::Hidden, + if (getLangOpts().Modules) { + Imported = TheModuleLoader.loadModule(ModuleImportLoc, Path, Module::Hidden, /*IsInclusionDirective=*/false); if (Imported) makeModuleVisible(Imported, SemiLoc); } if (Callbacks) - Callbacks->moduleImport(ModuleImportLoc, NamedModuleImportPath, Imported); + Callbacks->moduleImport(ModuleImportLoc, Path, Imported); if (!Suffix.empty()) { - EnterTokens(Suffix); + EnterModuleSuffixTokenStream(Suffix); return false; } return true; diff --git a/clang/lib/Lex/TokenConcatenation.cpp b/clang/lib/Lex/TokenConcatenation.cpp index 05f4203bd722b..f94caee24dc11 100644 --- a/clang/lib/Lex/TokenConcatenation.cpp +++ b/clang/lib/Lex/TokenConcatenation.cpp @@ -161,7 +161,8 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, const Token &Tok) const { // No space is required between header unit name in quote and semi. - if (PrevTok.is(tok::annot_header_unit) && Tok.is(tok::semi)) + if (PrevTok.isOneOf(tok::annot_header_unit, tok::annot_module_name) && + Tok.is(tok::semi)) return false; // Conservatively assume that every annotation token that has a printable @@ -197,11 +198,12 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, if (Tok.isAnnotation()) { // Modules annotation can show up when generated automatically for includes. assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, - tok::annot_module_end, tok::annot_embed) && + tok::annot_module_end, tok::annot_embed, + tok::annot_module_name) && "unexpected annotation in AvoidConcat"); ConcatInfo = 0; - if (Tok.is(tok::annot_embed)) + if (Tok.isOneOf(tok::annot_embed, tok::annot_module_name)) return true; } diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 47f4134fb1465..e9531ee9794d2 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -625,6 +625,18 @@ bool TokenLexer::Lex(Token &Tok) { // that it is no longer being expanded. if (Macro) Macro->EnableMacro(); + // CWG2947: Allow the following code: + // + // export module m; int x; + // extern "C++" int *y = &x; + // + // The 'extern' token should has 'StartOfLine' flag when current TokenLexer + // exits and propagate line start/leading space info. + if (isLexingCXXModuleDirective()) { + AtStartOfLine = true; + setLexingCXXModuleDirective(false); + } + Tok.startToken(); Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); @@ -699,7 +711,9 @@ bool TokenLexer::Lex(Token &Tok) { HasLeadingSpace = false; // Handle recursive expansion! - if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { + if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr && + (!PP.getLangOpts().CPlusPlusModules || + !Tok.isModuleContextualKeyword(PP.getLangOpts()))) { // Change the kind of this identifier to the appropriate token kind, e.g. // turning "for" into a keyword. IdentifierInfo *II = Tok.getIdentifierInfo(); @@ -947,6 +961,18 @@ bool TokenLexer::isParsingPreprocessorDirective() const { return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd(); } +/// setLexingCXXModuleDirective - This is set to true if this TokenLexer is +/// created when handling C++ module directive. +void TokenLexer::setLexingCXXModuleDirective(bool Val) { + LexingCXXModuleDirective = Val; +} + +/// isLexingCXXModuleDirective - Return true if we are lexing a C++ module or +/// import directive. +bool TokenLexer::isLexingCXXModuleDirective() const { + return LexingCXXModuleDirective; +} + /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes /// together to form a comment that comments out everything in the current /// macro, other active macros, and anything left on the current physical diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index a17398b84c6a6..a17e66708a431 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -17,6 +17,9 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/StackExhaustionHandler.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" @@ -517,8 +520,6 @@ void Parser::Initialize() { Ident_trivially_relocatable_if_eligible = nullptr; Ident_replaceable_if_eligible = nullptr; Ident_GNU_final = nullptr; - Ident_import = nullptr; - Ident_module = nullptr; Ident_super = &PP.getIdentifierTable().get("super"); @@ -574,11 +575,6 @@ void Parser::Initialize() { PP.SetPoisonReason(Ident_AbnormalTermination,diag::err_seh___finally_block); } - if (getLangOpts().CPlusPlusModules) { - Ident_import = PP.getIdentifierInfo("import"); - Ident_module = PP.getIdentifierInfo("module"); - } - Actions.Initialize(); // Prime the lexer look-ahead. @@ -626,25 +622,8 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, switch (NextToken().getKind()) { case tok::kw_module: goto module_decl; - - // Note: no need to handle kw_import here. We only form kw_import under - // the Standard C++ Modules, and in that case 'export import' is parsed as - // an export-declaration containing an import-declaration. - - // Recognize context-sensitive C++20 'export module' and 'export import' - // declarations. - case tok::identifier: { - IdentifierInfo *II = NextToken().getIdentifierInfo(); - if ((II == Ident_module || II == Ident_import) && - GetLookAheadToken(2).isNot(tok::coloncolon)) { - if (II == Ident_module) - goto module_decl; - else - goto import_decl; - } - break; - } - + case tok::kw_import: + goto import_decl; default: break; } @@ -712,22 +691,6 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result, Actions.ActOnEndOfTranslationUnit(); //else don't tell Sema that we ended parsing: more input might come. return true; - - case tok::identifier: - // C++2a [basic.link]p3: - // A token sequence beginning with 'export[opt] module' or - // 'export[opt] import' and not immediately followed by '::' - // is never interpreted as the declaration of a top-level-declaration. - if ((Tok.getIdentifierInfo() == Ident_module || - Tok.getIdentifierInfo() == Ident_import) && - NextToken().isNot(tok::coloncolon)) { - if (Tok.getIdentifierInfo() == Ident_module) - goto module_decl; - else - goto import_decl; - } - break; - default: break; } @@ -920,8 +883,10 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, case tok::kw_import: { Sema::ModuleImportState IS = Sema::ModuleImportState::NotACXX20Module; if (getLangOpts().CPlusPlusModules) { - llvm_unreachable("not expecting a c++20 import here"); - ProhibitAttributes(Attrs); + Diag(Tok, diag::err_unexpected_module_or_import_decl) + << /*IsImport*/ true; + SkipUntil(tok::semi); + return nullptr; } SingleDecl = ParseModuleImport(SourceLocation(), IS); } break; @@ -1013,7 +978,7 @@ Parser::ParseExternalDeclaration(ParsedAttributes &Attrs, return nullptr; case tok::kw_module: - Diag(Tok, diag::err_unexpected_module_decl); + Diag(Tok, diag::err_unexpected_module_or_import_decl) << /*IsImport*/ false; SkipUntil(tok::semi); return nullptr; @@ -2239,6 +2204,11 @@ void Parser::CodeCompleteNaturalLanguage() { Actions.CodeCompletion().CodeCompleteNaturalLanguage(); } +void Parser::CodeCompleteModuleImport(SourceLocation ImportLoc, + ModuleIdPath Path) { + Actions.CodeCompletion().CodeCompleteModuleImport(ImportLoc, Path); +} + bool Parser::ParseMicrosoftIfExistsCondition(IfExistsCondition& Result) { assert((Tok.is(tok::kw___if_exists) || Tok.is(tok::kw___if_not_exists)) && "Expected '__if_exists' or '__if_not_exists'"); @@ -2350,10 +2320,8 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { ? Sema::ModuleDeclKind::Interface : Sema::ModuleDeclKind::Implementation; - assert( - (Tok.is(tok::kw_module) || - (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == Ident_module)) && - "not a module declaration"); + assert(Tok.is(tok::kw_module) && "not a module declaration"); + SourceLocation ModuleLoc = ConsumeToken(); // Attributes appear after the module name, not before. @@ -2418,7 +2386,9 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { /*DiagnoseEmptyAttrs=*/false, /*WarnOnUnknownAttrs=*/true); - ExpectAndConsumeSemi(diag::err_module_expected_semi); + if (ExpectAndConsumeSemi(diag::err_expected_semi_after_module_or_import, + tok::getKeywordSpelling(tok::kw_module))) + SkipUntil(tok::semi); return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition, ImportState, @@ -2432,7 +2402,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, SourceLocation ExportLoc; TryConsumeToken(tok::kw_export, ExportLoc); - assert((AtLoc.isInvalid() ? Tok.isOneOf(tok::kw_import, tok::identifier) + assert((AtLoc.isInvalid() ? Tok.is(tok::kw_import) : Tok.isObjCAtKeyword(tok::objc_import)) && "Improper start to module import"); bool IsObjCAtImport = Tok.isObjCAtKeyword(tok::objc_import); @@ -2457,12 +2427,12 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, Diag(ColonLoc, diag::err_unsupported_module_partition) << SourceRange(ColonLoc, Path.back().getLoc()); // Recover by leaving partition empty. - else if (ParseModuleName(ColonLoc, Path, /*IsImport*/ true)) + else if (ParseModuleName(ColonLoc, Path, /*IsImport=*/true)) return nullptr; else IsPartition = true; } else { - if (ParseModuleName(ImportLoc, Path, /*IsImport*/ true)) + if (ParseModuleName(ImportLoc, Path, /*IsImport=*/true)) return nullptr; } @@ -2522,8 +2492,17 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, SeenError = false; break; } - ExpectAndConsumeSemi(diag::err_module_expected_semi); - TryConsumeToken(tok::eod); + + bool DontSeeSemi = false; + if (getLangOpts().CPlusPlusModules) + DontSeeSemi = + ExpectAndConsumeSemi(diag::err_expected_semi_after_module_or_import, + tok::getKeywordSpelling(tok::kw_import)); + else + DontSeeSemi = ExpectAndConsumeSemi(diag::err_module_expected_semi); + + if (DontSeeSemi) + SkipUntil(tok::semi); if (SeenError) return nullptr; @@ -2554,29 +2533,16 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, bool Parser::ParseModuleName(SourceLocation UseLoc, SmallVectorImpl &Path, bool IsImport) { - // Parse the module path. - while (true) { - if (!Tok.is(tok::identifier)) { - if (Tok.is(tok::code_completion)) { - cutOffParsing(); - Actions.CodeCompletion().CodeCompleteModuleImport(UseLoc, Path); - return true; - } - - Diag(Tok, diag::err_module_expected_ident) << IsImport; - SkipUntil(tok::semi); - return true; - } - - // Record this part of the module path. - Path.emplace_back(Tok.getLocation(), Tok.getIdentifierInfo()); - ConsumeToken(); - - if (Tok.isNot(tok::period)) - return false; - - ConsumeToken(); + if (Tok.isNot(tok::annot_module_name)) { + SkipUntil(tok::semi); + return true; } + ModuleNameLoc *NameLoc = + static_cast(Tok.getAnnotationValue()); + Path.assign(NameLoc->getModuleIdPath().begin(), + NameLoc->getModuleIdPath().end()); + ConsumeAnnotationToken(); + return false; } bool Parser::parseMisplacedModuleImport() { diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index a2aa3eaaa7f6d..a2e3494cf3ed5 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -58,23 +58,6 @@ static void checkModuleImportContext(Sema &S, Module *M, } } -// We represent the primary and partition names as 'Paths' which are sections -// of the hierarchical access path for a clang module. However for C++20 -// the periods in a name are just another character, and we will need to -// flatten them into a string. -static std::string stringFromPath(ModuleIdPath Path) { - std::string Name; - if (Path.empty()) - return Name; - - for (auto &Piece : Path) { - if (!Name.empty()) - Name += "."; - Name += Piece.getIdentifierInfo()->getName(); - } - return Name; -} - /// Helper function for makeTransitiveImportsVisible to decide whether /// the \param Imported module unit is in the same module with the \param /// CurrentModule. @@ -305,7 +288,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // We were asked to compile a module interface unit but this is a module // implementation unit. Diag(ModuleLoc, diag::err_module_interface_implementation_mismatch) - << FixItHint::CreateInsertion(ModuleLoc, "export "); + << FixItHint::CreateInsertion(ModuleLoc, "export "); MDK = ModuleDeclKind::Interface; break; @@ -372,10 +355,10 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // Flatten the dots in a module name. Unlike Clang's hierarchical module map // modules, the dots here are just another character that can appear in a // module name. - std::string ModuleName = stringFromPath(Path); + std::string ModuleName = ModuleLoader::getFlatNameFromPath(Path); if (IsPartition) { ModuleName += ":"; - ModuleName += stringFromPath(Partition); + ModuleName += ModuleLoader::getFlatNameFromPath(Partition); } // If a module name was explicitly specified on the command line, it must be // correct. @@ -388,7 +371,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, << getLangOpts().CurrentModule; return nullptr; } - const_cast(getLangOpts()).CurrentModule = ModuleName; + const_cast(getLangOpts()).CurrentModule = ModuleName; auto &Map = PP.getHeaderSearchInfo().getModuleMap(); Module *Mod; // The module we are creating. @@ -433,7 +416,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, Interface = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, Module::AllVisible, /*IsInclusionDirective=*/false); - const_cast(getLangOpts()).CurrentModule = ModuleName; + const_cast(getLangOpts()).CurrentModule = ModuleName; if (!Interface) { Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName; @@ -596,12 +579,12 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, // otherwise, the name of the importing named module. ModuleName = NamedMod->getPrimaryModuleInterfaceName().str(); ModuleName += ":"; - ModuleName += stringFromPath(Path); + ModuleName += ModuleLoader::getFlatNameFromPath(Path); ModuleNameLoc = IdentifierLoc(Path[0].getLoc(), PP.getIdentifierInfo(ModuleName)); Path = ModuleIdPath(ModuleNameLoc); } else if (getLangOpts().CPlusPlusModules) { - ModuleName = stringFromPath(Path); + ModuleName = ModuleLoader::getFlatNameFromPath(Path); ModuleNameLoc = IdentifierLoc(Path[0].getLoc(), PP.getIdentifierInfo(ModuleName)); Path = ModuleIdPath(ModuleNameLoc); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index d67178c153e88..23d89e88279ac 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -631,7 +631,8 @@ void ModuleDepCollectorPP::InclusionDirective( void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) { - if (MDC.ScanInstance.getPreprocessor().isInImportingCXXNamedModules()) { + auto &PP = MDC.ScanInstance.getPreprocessor(); + if (PP.getLangOpts().CPlusPlusModules && PP.isImportingCXXNamedModules()) { P1689ModuleInfo RequiredModule; RequiredModule.ModuleName = Path[0].getIdentifierInfo()->getName().str(); RequiredModule.Type = P1689ModuleInfo::ModuleType::NamedCXXModule; diff --git a/clang/test/CXX/basic/basic.link/p3.cpp b/clang/test/CXX/basic/basic.link/p3.cpp index e6633a777ddef..e845a16f65952 100644 --- a/clang/test/CXX/basic/basic.link/p3.cpp +++ b/clang/test/CXX/basic/basic.link/p3.cpp @@ -13,7 +13,8 @@ struct module { struct inner {}; }; constexpr int n = 123; export module m; // #1 -module y = {}; // expected-error {{multiple module declarations}} expected-error 2{{}} +module y = {}; // expected-error {{multiple module declarations}} +// expected-error@-1 {{module directive must end with a ';'}} // expected-note@#1 {{previous module declaration}} ::import x = {}; @@ -23,8 +24,8 @@ import::inner xi = {}; module::inner yi = {}; namespace N { - module a; - import b; + module a; // expected-error {{module declaration can only appear at the top level}} + import b; // expected-error {{import declaration can only appear at the top level}} } extern "C++" module cxxm; @@ -45,10 +46,11 @@ constexpr int n = 123; export module m; // #1 -import x = {}; // expected-error {{expected ';' after module name}} +import x = {}; // expected-error {{import directive must end with a ';'}} // expected-error@-1 {{module 'x' not found}} //--- ImportError2.cpp +// expected-no-diagnostics module; struct module { struct inner {}; }; @@ -63,7 +65,4 @@ template<> struct import { static X y; }; -// This is not valid because the 'import ' is a pp-import, even though it -// grammatically can't possibly be an import declaration. -struct X {} import::y; // expected-error {{'n' file not found}} - +struct X {} import::y; diff --git a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp index fd0038b3f7745..a57919f48afdd 100644 --- a/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp +++ b/clang/test/CXX/basic/basic.scope/basic.scope.namespace/p2.cpp @@ -107,4 +107,4 @@ void test_late() { // expected-error@-2 {{undeclared identifier}} internal_private = 1; // expected-error {{use of undeclared identifier 'internal_private'}} -} \ No newline at end of file +} diff --git a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp index 0e0e5fec6e9d8..81af65481dc22 100644 --- a/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp +++ b/clang/test/CXX/lex/lex.pptoken/p3-2a.cpp @@ -1,7 +1,7 @@ // RUN: not %clang_cc1 -std=c++2a -E -I%S/Inputs %s -o - | FileCheck %s --strict-whitespace --implicit-check-not=ERROR // Check for context-sensitive header-name token formation. -// CHECK: import ; +// CHECK: __preprocessed_import ; import ; // Not at the top level: these are each 8 tokens rather than 5. @@ -12,59 +12,64 @@ import ; // CHECK: [ import ; %> [ import ; %> -// CHECK: import ; +// CHECK: __preprocessed_import ; import ; -// CHECK: foo; import ; +// CHECK: foo; import ; foo; import ; // CHECK: foo import ; foo import ; -// CHECK: import {{\[\[ ]]}}; +// CHECK: __preprocessed_import {{\[\[ ]]}}; import [[ ]]; -// CHECK: import import ; +// CHECK: __preprocessed_import import ; import import ; // FIXME: We do not form header-name tokens in the pp-import-suffix of a // pp-import. Conforming programs can't tell the difference. -// CHECK: import {} import ; +// CHECK: __preprocessed_import {} import ; // FIXME: import {} import ; import {} import ; -// CHECK: export import ; +// CHECK: export __preprocessed_import ; export import ; // CHECK: export export import ; export export import ; #define UNBALANCED_PAREN ( -// CHECK: import ; +// CHECK: __preprocessed_import ; import ; UNBALANCED_PAREN -// CHECK: import ; +// CHECK: __preprocessed_import ; import ; ) _Pragma("clang no_such_pragma ("); -// CHECK: import ; +// CHECK: __preprocessed_import ; import ; #define HEADER -// CHECK: import ; +// CHECK: __preprocessed_import ; import HEADER; -// CHECK: import ; +// CHECK: {{^}}foo{{$}} +// CHECK-NEXT: {{^}} bar{{$}} +// CHECK-NEXT: {{^}}>;{{$}} import < foo bar >; // CHECK: import{{$}} -// CHECK: {{^}}; +// CHECK-NEXT: {{^}}<{{$}} +// CHECK-NEXT: {{^}}foo{{$}} +// CHECK-NEXT: {{^}} bar{{$}} +// CHECK-NEXT: {{^}}>;{{$}} import < foo @@ -72,7 +77,7 @@ foo >; // CHECK: import{{$}} -// CHECK: {{^}}; +// CHECK: {{^}}; import ; diff --git a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp index 4bdcc9e5f278e..d4265b0f8dd8e 100644 --- a/clang/test/CXX/module/basic/basic.link/module-declaration.cpp +++ b/clang/test/CXX/module/basic/basic.link/module-declaration.cpp @@ -46,8 +46,7 @@ export module z; export module x; //--- invalid_module_name.cppm -export module z elderberry; // expected-error {{expected ';'}} \ - // expected-error {{a type specifier is required}} +export module z elderberry; // expected-error {{module directive must end with a ';'}} //--- empty_attribute.cppm // expected-no-diagnostics diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp new file mode 100644 index 0000000000000..059c2e19141a3 --- /dev/null +++ b/clang/test/CXX/module/cpp.pre/p1.cpp @@ -0,0 +1,231 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 %t/hash.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/module.cpp -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/rightpad.cppm -emit-module-interface -o %t/rightpad.pcm +// RUN: %clang_cc1 -std=c++20 %t/M_part.cppm -emit-module-interface -o %t/M_part.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-system-header %t/string -emit-header-unit -o %t/string.pcm +// RUN: %clang_cc1 -std=c++20 -xc++-user-header %t/squee -emit-header-unit -o %t/squee.pcm +// RUN: %clang_cc1 -std=c++20 %t/import.cpp -isystem %t \ +// RUN: -fmodule-file=rightpad=%t/rightpad.pcm \ +// RUN: -fmodule-file=M:part=%t/M_part.pcm \ +// RUN: -fmodule-file=%t/string.pcm \ +// RUN: -fmodule-file=%t/squee.pcm \ +// RUN: -fsyntax-only -verify + +// RUN: %clang_cc1 -std=c++20 %t/module_decl_not_in_same_line.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm +// RUN: %clang_cc1 -std=c++20 %t/import_decl_not_in_same_line.cpp -fmodule-file=foo=%t/foo.pcm -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/not_import.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/import_spaceship.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/leading_empty_macro.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/operator_keyword_and.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/operator_keyword_and2.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/macro_in_module_decl_suffix.cpp -D'ATTR(X)=[[X]]' -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/macro_in_module_decl_suffix2.cpp -D'ATTR(X)=[[X]]' -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/extra_tokens_after_module_decl1.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/extra_tokens_after_module_decl2.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/object_like_macro_in_module_name.cpp -Dm=x -Dn=y -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/object_like_macro_in_partition_name.cpp -Dm=x -Dn=y -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/unexpected_character_in_pp_module_suffix.cpp -D'm(x)=x' -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/semi_in_same_line.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_example1.cpp -D'DOT_BAR=.bar' -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_example2.cpp -D'MOD_ATTR=[[vendor::shiny_module]]' -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_example3.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_example4.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_example5.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_example6.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_ext1.cpp -verify -E | FileCheck %t/cwg2947_ext1.cpp +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_ext2.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/cwg2947_ext3.cpp -fsyntax-only -verify +// RUN: %clang_cc1 -std=c++20 %t/preprocessed_module_file.cpp -E | FileCheck %t/preprocessed_module_file.cpp +// RUN: %clang_cc1 -std=c++20 %t/pedantic-errors.cpp -pedantic-errors -fsyntax-only -verify + + +//--- hash.cpp +// expected-no-diagnostics +# // preprocessing directive + +//--- module.cpp +// expected-no-diagnostics +module ; // preprocessing directive +export module leftpad; // preprocessing directive + +//--- string +#ifndef STRING_H +#define STRING_H +#endif // STRING_H + +//--- squee +#ifndef SQUEE_H +#define SQUEE_H +#endif + +//--- rightpad.cppm +export module rightpad; + +//--- M_part.cppm +export module M:part; + +//--- import.cpp +export module M; +import ; // expected-warning {{the implementation of header units is in an experimental phase}} +export import "squee"; // expected-warning {{the implementation of header units is in an experimental phase}} +import rightpad; // preprocessing directive +import :part; // preprocessing directive + +//--- module_decl_not_in_same_line.cpp +module // expected-error {{a type specifier is required for all declarations}} +;export module M; // expected-error {{export declaration can only be used within a module interface}} \ + // expected-error {{unknown type name 'module'}} + +//--- foo.cppm +export module foo; + +//--- import_decl_not_in_same_line.cpp +export module M; +export +import // expected-error {{unknown type name 'import'}} +foo; + +export +import foo; // expected-error {{unknown type name 'import'}} + +//--- not_import.cpp +export module M; +import :: // expected-error {{use of undeclared identifier 'import'}} +import -> // expected-error {{cannot use arrow operator on a type}} + +//--- import_spaceship.cpp +export module M; +import <=>; // expected-error {{'=' file not found}} + +//--- leading_empty_macro.cpp +// expected-no-diagnostics +export module M; +typedef int import; +#define EMP +EMP import m; // The phase 7 grammar should see import as a typedef-name. + +//--- operator_keyword_and.cpp +// expected-no-diagnostics +typedef int import; +extern +import and x; + +//--- operator_keyword_and2.cpp +// expected-no-diagnostics +typedef int module; +extern +module and x; + +//--- macro_in_module_decl_suffix.cpp +export module m ATTR(x); // expected-warning {{unknown attribute 'x' ignored}} + +//--- macro_in_module_decl_suffix2.cpp +export module m [[y]] ATTR(x); // expected-warning {{unknown attribute 'y' ignored}} \ + // expected-warning {{unknown attribute 'x' ignored}} + +//--- extra_tokens_after_module_decl1.cpp +module; int n; // expected-warning {{extra tokens at end of 'module' directive}} +import foo; int n1; // expected-warning {{extra tokens at end of 'import' directive}} + // expected-error@-1 {{module 'foo' not found}} +const int *p1 = &n1; + + +//--- extra_tokens_after_module_decl2.cpp +export module m; int n2 // expected-warning {{extra tokens at end of 'module' directive}} +; +const int *p2 = &n2; + + +//--- object_like_macro_in_module_name.cpp +export module m.n; +// expected-error@-1 {{module name component 'm' cannot be a object-like macro}} +// expected-note@* {{macro 'm' defined here}} +// expected-error@-3 {{module name component 'n' cannot be a object-like macro}} +// expected-note@* {{macro 'n' defined here}} + +//--- object_like_macro_in_partition_name.cpp +export module m:n; +// expected-error@-1 {{module name component 'm' cannot be a object-like macro}} +// expected-note@* {{macro 'm' defined here}} +// expected-error@-3 {{partition name component 'n' cannot be a object-like macro}} +// expected-note@* {{macro 'n' defined here}} + +//--- unexpected_character_in_pp_module_suffix.cpp +export module m(); +// expected-error@-1 {{module directive must end with a ';'}} + +//--- semi_in_same_line.cpp +export module m // OK +[[]]; + +import foo // expected-error {{module 'foo' not found}} +; + +//--- cwg2947_example1.cpp +export module foo DOT_BAR; // error: expansion of DOT_BAR; does not begin with ; or [ +// expected-error@-1 {{module directive must end with a ';'}} + +//--- cwg2947_example2.cpp +export module M MOD_ATTR ; // OK +// expected-warning@-1 {{unknown attribute 'vendor::shiny_module' ignored}} + +//--- cwg2947_example3.cpp +export module a + .b; // error: preprocessing token after pp-module-name is not ; or [ +// expected-error@-2 {{module directive must end with a ';'}} + +//--- cwg2947_example4.cpp +export module M [[ + attr1, + attr2 ]] ; // OK +// expected-warning@-2 {{unknown attribute 'attr1' ignored}} +// expected-warning@-2 {{unknown attribute 'attr2' ignored}} + +//--- cwg2947_example5.cpp +export module M + [[ attr1, + attr2 ]] ; // OK +// expected-warning@-2 {{unknown attribute 'attr1' ignored}} +// expected-warning@-2 {{unknown attribute 'attr2' ignored}} + +//--- cwg2947_example6.cpp +export module M; int // expected-warning {{extra tokens at end of 'module' directive}} + n; // OK + +//--- cwg2947_ext1.cpp +// CHECK: export __preprocessed_module m; int x; +// CHECK-NEXT: extern "C++" int *y = &x; +export module m; int x; // expected-warning {{extra tokens at end of 'module' directive}} +extern "C++" int *y = &x; + +//--- cwg2947_ext2.cpp +export module x _Pragma("GCC warning \"Hi\""); // expected-warning {{Hi}} + +//--- cwg2947_ext3.cpp +export module x; _Pragma("GCC warning \"hi\""); // expected-warning {{hi}} +// expected-warning@-1 {{extra tokens at end of 'module' directive}} + +//--- preprocessed_module_file.cpp +// CHECK: __preprocessed_module; +// CHECK-NEXT: export __preprocessed_module M; +// CHECK-NEXT: __preprocessed_import std; +// CHECK-NEXT: export __preprocessed_import bar; +// CHECK-NEXT: struct import {}; +// CHECK-EMPTY: +// CHECK-NEXT: import foo; +module; +export module M; +import std; +export import bar; +struct import {}; +#define EMPTY +EMPTY import foo; + +//--- pedantic-errors.cpp +export module m; int n; // expected-warning {{extra tokens at end of 'module' directive}} diff --git a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm index f65f050a3c7bd..28fb1827eed3b 100644 --- a/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm +++ b/clang/test/CXX/module/dcl.dcl/dcl.module/dcl.module.import/p1.cppm @@ -44,8 +44,8 @@ import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applie import x [[blarg::noreturn]]; // expected-warning-re {{unknown attribute 'blarg::noreturn' ignored{{.*}}}} import x.y; -import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import x.; // expected-error {{expected identifier after '.' in module name}} +import .x; // expected-error {{unknown type name 'import'}} expected-error {{cannot use dot operator on a type}} import blarg; // expected-error {{module 'blarg' not found}} @@ -62,8 +62,8 @@ import x [[noreturn]]; // expected-error {{'noreturn' attribute cannot be applie import x [[blarg::noreturn]]; // expected-warning-re {{unknown attribute 'blarg::noreturn' ignored{{.*}}}} import x.y; -import x.; // expected-error {{expected a module name after 'import'}} -import .x; // expected-error {{expected a module name after 'import'}} +import x.; // expected-error {{expected identifier after '.' in module name}} +import .x; // expected-error {{unknown type name 'import'}} expected-error {{cannot use dot operator on a type}} import blarg; // expected-error {{module 'blarg' not found}} diff --git a/clang/test/Lexer/cxx20-module-directive.cpp b/clang/test/Lexer/cxx20-module-directive.cpp new file mode 100644 index 0000000000000..e420ff4b11407 --- /dev/null +++ b/clang/test/Lexer/cxx20-module-directive.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -E -std=c++20 %s + +// CHECK: export __preprocessed_module M; +// CHECK-NEXT: export __preprocessed_import K; +// CHECK-NEXT: typedef int import; +// CHECK: import m; +export module M; +export import K; +typedef int import; +#define EMP +EMP import m; diff --git a/clang/test/Modules/pr121066.cpp b/clang/test/Modules/pr121066.cpp index e92a81c53d683..676c5225f2090 100644 --- a/clang/test/Modules/pr121066.cpp +++ b/clang/test/Modules/pr121066.cpp @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -std=c++20 -fsyntax-only %s -verify -import mod // expected-error {{expected ';' after module name}} +// This import directive is ill-formed, it's missing an ';' after +// module name, but we try to recovery from error and import the module. +import mod // expected-error {{import directive must end with a ';'}} // expected-error@-1 {{module 'mod' not found}} diff --git a/clang/test/Modules/preprocess-named-modules.cppm b/clang/test/Modules/preprocess-named-modules.cppm index 67a6cc384a1c7..5feb1772c145b 100644 --- a/clang/test/Modules/preprocess-named-modules.cppm +++ b/clang/test/Modules/preprocess-named-modules.cppm @@ -4,4 +4,4 @@ // RUN: %clang_cc1 -std=c++20 -E %s -o - | FileCheck %s import non_exist_modules; -// CHECK: import non_exist_modules; +// CHECK: __preprocessed_import non_exist_modules; diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 9692d6e6fae97..af6bb4d9e0d43 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -193,7 +193,8 @@ TEST_P(ASTMatchersTest, ExportDecl) { if (!GetParam().isCXX20OrLater()) { return; } - const std::string moduleHeader = "module;export module ast_matcher_test;"; + const std::string moduleHeader = + "module;\n export module ast_matcher_test;\n"; EXPECT_TRUE(matches(moduleHeader + "export void foo();", exportDecl(has(functionDecl())))); EXPECT_TRUE(matches(moduleHeader + "export { void foo(); int v; }", diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index ddc87921ea084..79e2832798917 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -640,7 +640,7 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { EXPECT_STREQ("@import A;\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); - EXPECT_STREQ("@import A\n;\n", Out.data()); + EXPECT_STREQ("@import A;\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); EXPECT_STREQ("@import A.B;\n", Out.data()); @@ -685,18 +685,19 @@ TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) { minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); - ASSERT_FALSE(minimizeSourceToDependencyDirectives("import \n" + ASSERT_FALSE(minimizeSourceToDependencyDirectives("import ;\n" "@import Foo;", Out)); - EXPECT_STREQ("@import Foo;\n", Out.data()); + EXPECT_STREQ("import;\n@import Foo;\n", Out.data()); ASSERT_FALSE( - minimizeSourceToDependencyDirectives("import \n" + minimizeSourceToDependencyDirectives("import ;\n" "#import \n" "@;\n" "#pragma clang module import Foo", Out)); - EXPECT_STREQ("#import \n" + EXPECT_STREQ("import;\n" + "#import \n" "#pragma clang module import Foo\n", Out.data()); } @@ -1215,4 +1216,41 @@ TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) { EXPECT_STREQ("#ifndef A\n#define A\n#endif\n\n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, PreprocessedModule) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("export __preprocessed_module M;\n" + "struct import {};\n" + "import foo;\n" + "__preprocessed_import bar;\n", + Out)); + EXPECT_STREQ("export __preprocessed_module M;\n" + "__preprocessed_import bar;\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, ScanningPreprocessedModuleFile) { + StringRef Source = R"( + export __preprocessed_module M; + struct import {}; + import foo; + )"; + + ASSERT_TRUE(clang::isPreprocessedModuleFile(Source)); + + Source = R"( + export module M; + struct import {}; + import foo; + )"; + + ASSERT_FALSE(clang::isPreprocessedModuleFile(Source)); + + Source = R"( + __preprocessed_import foo; + )"; + ASSERT_TRUE(clang::isPreprocessedModuleFile(Source)); +} + } // end anonymous namespace diff --git a/clang/unittests/Lex/ModuleDeclStateTest.cpp b/clang/unittests/Lex/ModuleDeclStateTest.cpp index ac2ddfaf52cd0..3117c4f2f1af0 100644 --- a/clang/unittests/Lex/ModuleDeclStateTest.cpp +++ b/clang/unittests/Lex/ModuleDeclStateTest.cpp @@ -40,7 +40,7 @@ class CheckNamedModuleImportingCB : public PPCallbacks { void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override { ASSERT_TRUE(NextCheckingIndex < IsImportingNamedModulesAssertions.size()); - EXPECT_EQ(PP.isInImportingCXXNamedModules(), + EXPECT_EQ(PP.isImportingCXXNamedModules(), IsImportingNamedModulesAssertions[NextCheckingIndex]); NextCheckingIndex++; diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index a35e50150a2ab..bbef39d0ea240 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -910,7 +910,7 @@

C++20 implementation status

P1703R1 - Subsumed by P1857 + Subsumed by P1857 P1874R1 @@ -926,7 +926,7 @@

C++20 implementation status

P1857R3 - No + Clang 21 P2115R0