Skip to content

Commit f650b8a

Browse files
committed
[clang][modules] Lazily load by name lookups in module maps
Instead of eagerly populating the `clang::ModuleMap` when looking up a module by name, this patch changes `HeaderSearch` to only load the modules that are actually used. This introduces `ModuleMap::findOrLoadModule` which will load modules from parsed but not loaded module maps. This cannot be used anywhere that the module loading code calls into as it can create infinite recursion. This currently just reparses module maps when looking up a module by header. This is fine as redeclarations are allowed from the same file, but future patches will also make looking up a module by header lazy. This patch changes the shadow.m test to use explicitly built modules and `#import`. This test and the shadow feature are very brittle and do not work in general. The test relied on pcm files being left behind by prior failing clang invocations that were then reused by the last invocation. If you clean the cache then the last invocation will always fail. This is because the input module map and the `-fmodule-map-file=` module map are parsed in the same module scope, and `-fmodule-map-file=` is forwarded to implicit module builds. That means you are guaranteed to hit a module redeclaration error if the TU actually imports the module it is trying to shadow. This patch changes when we load A2's module map to after the `A` module has been loaded, which sets the `IsFromModuleFile` bit on `A`. This means that A2's `A` is skipped entirely instead of creating a shadow module, and we get textual inclusion. It is possible to construct a case where this would happen before this patch too. An upcoming patch in this series will rework shadowing to work in the general case, but that's only possible once header -> module lookup is lazy too.
1 parent 4c68061 commit f650b8a

17 files changed

+354
-39
lines changed

clang/include/clang/Basic/DiagnosticGroups.td

+1
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ def ModuleImport : DiagGroup<"module-import">;
576576
def ModuleConflict : DiagGroup<"module-conflict">;
577577
def ModuleFileExtension : DiagGroup<"module-file-extension">;
578578
def ModuleIncludeDirectiveTranslation : DiagGroup<"module-include-translation">;
579+
def ModuleMap : DiagGroup<"module-map">;
579580
def RoundTripCC1Args : DiagGroup<"round-trip-cc1-args">;
580581
def NewlineEOF : DiagGroup<"newline-eof">;
581582
def Nullability : DiagGroup<"nullability">;

clang/include/clang/Basic/DiagnosticLexKinds.td

+6
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,12 @@ def warn_pp_date_time : Warning<
836836
ShowInSystemHeader, DefaultIgnore, InGroup<DiagGroup<"date-time">>;
837837

838838
// Module map parsing
839+
def remark_mmap_parse : Remark<
840+
"parsing modulemap '%0'">, ShowInSystemHeader, InGroup<ModuleMap>;
841+
def remark_mmap_load : Remark<
842+
"loading modulemap '%0'">, ShowInSystemHeader, InGroup<ModuleMap>;
843+
def remark_mmap_load_module : Remark<
844+
"loading parsed module '%0'">, ShowInSystemHeader, InGroup<ModuleMap>;
839845
def err_mmap_unknown_token : Error<"skipping stray token">;
840846
def err_mmap_expected_module : Error<"expected module declaration">;
841847
def err_mmap_expected_module_name : Error<"expected module name">;

clang/include/clang/Lex/HeaderSearch.h

+39-2
Original file line numberDiff line numberDiff line change
@@ -332,13 +332,24 @@ class HeaderSearch {
332332
/// The mapping between modules and headers.
333333
mutable ModuleMap ModMap;
334334

335+
enum ModuleMapDirectoryState {
336+
MMDS_Parsed,
337+
MMDS_Loaded,
338+
MMDS_Invalid,
339+
};
340+
335341
/// Describes whether a given directory has a module map in it.
336-
llvm::DenseMap<const DirectoryEntry *, bool> DirectoryHasModuleMap;
342+
llvm::DenseMap<const DirectoryEntry *, ModuleMapDirectoryState>
343+
DirectoryHasModuleMap;
337344

338345
/// Set of module map files we've already loaded, and a flag indicating
339346
/// whether they were valid or not.
340347
llvm::DenseMap<const FileEntry *, bool> LoadedModuleMaps;
341348

349+
/// Set of module map files we've already pre-parsed, and a flag indicating
350+
/// whether they were valid or not.
351+
llvm::DenseMap<const FileEntry *, bool> PreParsedModuleMaps;
352+
342353
// A map of discovered headers with their associated include file name.
343354
llvm::DenseMap<const FileEntry *, llvm::SmallString<64>> IncludeNames;
344355

@@ -435,7 +446,7 @@ class HeaderSearch {
435446

436447
/// Consider modules when including files from this directory.
437448
void setDirectoryHasModuleMap(const DirectoryEntry* Dir) {
438-
DirectoryHasModuleMap[Dir] = true;
449+
DirectoryHasModuleMap[Dir] = MMDS_Loaded;
439450
}
440451

441452
/// Forget everything we know about headers so far.
@@ -717,6 +728,23 @@ class HeaderSearch {
717728
unsigned *Offset = nullptr,
718729
StringRef OriginalModuleMapFile = StringRef());
719730

731+
/// Read the contents of the given module map file.
732+
///
733+
/// \param File The module map file.
734+
/// \param IsSystem Whether this file is in a system header directory.
735+
/// \param ID If the module map file is already mapped (perhaps as part of
736+
/// processing a preprocessed module), the ID of the file.
737+
/// \param Offset [inout] An offset within ID to start parsing. On exit,
738+
/// filled by the end of the parsed contents (either EOF or the
739+
/// location of an end-of-module-map pragma).
740+
/// \param OriginalModuleMapFile The original path to the module map file,
741+
/// used to resolve paths within the module (this is required when
742+
/// building the module from preprocessed source).
743+
/// \returns true if an error occurred, false otherwise.
744+
bool preLoadModuleMapFile(FileEntryRef File, bool IsSystem,
745+
FileID ID = FileID(),
746+
StringRef OriginalModuleMapFile = StringRef());
747+
720748
/// Collect the set of all known, top-level modules.
721749
///
722750
/// \param Modules Will be filled with the set of known, top-level modules.
@@ -936,6 +964,10 @@ class HeaderSearch {
936964
FileID ID = FileID(),
937965
unsigned *Offset = nullptr);
938966

967+
LoadModuleMapResult parseModuleMapFileImpl(FileEntryRef File, bool IsSystem,
968+
DirectoryEntryRef Dir,
969+
FileID ID = FileID());
970+
939971
/// Try to load the module map file in the given directory.
940972
///
941973
/// \param DirName The name of the directory where we will look for a module
@@ -958,6 +990,11 @@ class HeaderSearch {
958990
/// named directory.
959991
LoadModuleMapResult loadModuleMapFile(DirectoryEntryRef Dir, bool IsSystem,
960992
bool IsFramework);
993+
994+
LoadModuleMapResult parseModuleMapFile(StringRef DirName, bool IsSystem,
995+
bool IsFramework);
996+
LoadModuleMapResult parseModuleMapFile(DirectoryEntryRef Dir, bool IsSystem,
997+
bool IsFramework);
961998
};
962999

9631000
/// Apply the header search options to get given HeaderSearch object.

clang/include/clang/Lex/ModuleMap.h

+20
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "clang/Basic/LangOptions.h"
1919
#include "clang/Basic/Module.h"
2020
#include "clang/Basic/SourceLocation.h"
21+
#include "clang/Lex/ModuleMapFile.h"
2122
#include "llvm/ADT/ArrayRef.h"
2223
#include "llvm/ADT/DenseMap.h"
2324
#include "llvm/ADT/DenseSet.h"
@@ -267,6 +268,18 @@ class ModuleMap {
267268
/// Describes whether we haved parsed a particular file as a module
268269
/// map.
269270
llvm::DenseMap<const FileEntry *, bool> ParsedModuleMap;
271+
llvm::DenseMap<const FileEntry *, const modulemap::ModuleMapFile *>
272+
PreParsedModuleMap;
273+
274+
std::vector<std::unique_ptr<modulemap::ModuleMapFile>> PreParsedModuleMaps;
275+
276+
/// Map from top level module name to a list of ModuleDecls in the order they
277+
/// were discovered. This allows handling shadowing correctly and diagnosing
278+
/// redefinitions.
279+
llvm::StringMap<SmallVector<std::pair<const modulemap::ModuleMapFile *,
280+
const modulemap::ModuleDecl *>,
281+
1>>
282+
PreParsedModules;
270283

271284
/// Resolve the given export declaration into an actual export
272285
/// declaration.
@@ -483,6 +496,8 @@ class ModuleMap {
483496
/// \returns The named module, if known; otherwise, returns null.
484497
Module *findModule(StringRef Name) const;
485498

499+
Module *findOrLoadModule(StringRef Name);
500+
486501
Module *findOrInferSubmodule(Module *Parent, StringRef Name);
487502

488503
/// Retrieve a module with the given name using lexical name lookup,
@@ -698,6 +713,11 @@ class ModuleMap {
698713
void addHeader(Module *Mod, Module::Header Header,
699714
ModuleHeaderRole Role, bool Imported = false);
700715

716+
/// Parse a module map without creating `clang::Module` instances.
717+
bool preParseModuleMapFile(FileEntryRef File, bool IsSystem,
718+
DirectoryEntryRef Dir, FileID ID = FileID(),
719+
SourceLocation ExternModuleLoc = SourceLocation());
720+
701721
/// Parse the given module map file, and record any modules we
702722
/// encounter.
703723
///

clang/include/clang/Lex/ModuleMapFile.h

+9
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,17 @@ using TopLevelDecl = std::variant<ModuleDecl, ExternModuleDecl>;
133133
/// This holds many reference types (StringRef, SourceLocation, etc.) whose
134134
/// lifetimes are bound by the SourceManager and FileManager used.
135135
struct ModuleMapFile {
136+
/// The FileID used to parse this module map. This is always a local ID.
137+
FileID ID;
138+
139+
/// The directory in which the module map was discovered. Declarations in
140+
/// the module map are relative to this directory.
141+
OptionalDirectoryEntryRef Dir;
142+
136143
/// Beginning of the file, used for moduleMapFileRead callback.
137144
SourceLocation Start;
145+
146+
bool IsSystem;
138147
std::vector<TopLevelDecl> Decls;
139148

140149
void dump(llvm::raw_ostream &out) const;

clang/lib/Frontend/CompilerInstance.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -579,13 +579,13 @@ struct ReadModuleNames : ASTReaderListener {
579579
ModuleMap &MM = PP.getHeaderSearchInfo().getModuleMap();
580580
for (const std::string &LoadedModule : LoadedModules)
581581
MM.cacheModuleLoad(*PP.getIdentifierInfo(LoadedModule),
582-
MM.findModule(LoadedModule));
582+
MM.findOrLoadModule(LoadedModule));
583583
LoadedModules.clear();
584584
}
585585

586586
void markAllUnavailable() {
587587
for (const std::string &LoadedModule : LoadedModules) {
588-
if (Module *M = PP.getHeaderSearchInfo().getModuleMap().findModule(
588+
if (Module *M = PP.getHeaderSearchInfo().getModuleMap().findOrLoadModule(
589589
LoadedModule)) {
590590
M->HasIncompatibleModuleFile = true;
591591

clang/lib/Lex/HeaderSearch.cpp

+89-14
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName,
300300
SourceLocation ImportLoc, bool AllowSearch,
301301
bool AllowExtraModuleMapSearch) {
302302
// Look in the module map to determine if there is a module by this name.
303-
Module *Module = ModMap.findModule(ModuleName);
303+
Module *Module = ModMap.findOrLoadModule(ModuleName);
304304
if (Module || !AllowSearch || !HSOpts->ImplicitModuleMaps)
305305
return Module;
306306

@@ -360,11 +360,11 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
360360
// checked
361361
DirectoryEntryRef NormalDir = *Dir.getDirRef();
362362
// Search for a module map file in this directory.
363-
if (loadModuleMapFile(NormalDir, IsSystem,
364-
/*IsFramework*/false) == LMM_NewlyLoaded) {
363+
if (parseModuleMapFile(NormalDir, IsSystem,
364+
/*IsFramework*/ false) == LMM_NewlyLoaded) {
365365
// We just loaded a module map file; check whether the module is
366366
// available now.
367-
Module = ModMap.findModule(ModuleName);
367+
Module = ModMap.findOrLoadModule(ModuleName);
368368
if (Module)
369369
break;
370370
}
@@ -374,10 +374,10 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
374374
SmallString<128> NestedModuleMapDirName;
375375
NestedModuleMapDirName = Dir.getDirRef()->getName();
376376
llvm::sys::path::append(NestedModuleMapDirName, ModuleName);
377-
if (loadModuleMapFile(NestedModuleMapDirName, IsSystem,
378-
/*IsFramework*/false) == LMM_NewlyLoaded){
377+
if (parseModuleMapFile(NestedModuleMapDirName, IsSystem,
378+
/*IsFramework*/ false) == LMM_NewlyLoaded) {
379379
// If we just loaded a module map file, look for the module again.
380-
Module = ModMap.findModule(ModuleName);
380+
Module = ModMap.findOrLoadModule(ModuleName);
381381
if (Module)
382382
break;
383383
}
@@ -394,7 +394,7 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
394394
loadSubdirectoryModuleMaps(Dir);
395395

396396
// Look again for the module.
397-
Module = ModMap.findModule(ModuleName);
397+
Module = ModMap.findOrLoadModule(ModuleName);
398398
if (Module)
399399
break;
400400
}
@@ -1583,7 +1583,7 @@ bool HeaderSearch::hasModuleMap(StringRef FileName,
15831583
// Success. All of the directories we stepped through inherit this module
15841584
// map file.
15851585
for (unsigned I = 0, N = FixUpDirectories.size(); I != N; ++I)
1586-
DirectoryHasModuleMap[FixUpDirectories[I]] = true;
1586+
DirectoryHasModuleMap[FixUpDirectories[I]] = MMDS_Loaded;
15871587
return true;
15881588

15891589
case LMM_NoDirectory:
@@ -1801,6 +1801,33 @@ HeaderSearch::loadModuleMapFileImpl(FileEntryRef File, bool IsSystem,
18011801
return LMM_NewlyLoaded;
18021802
}
18031803

1804+
HeaderSearch::LoadModuleMapResult
1805+
HeaderSearch::parseModuleMapFileImpl(FileEntryRef File, bool IsSystem,
1806+
DirectoryEntryRef Dir, FileID ID) {
1807+
// Check whether we've already parsed this module map, and mark it as being
1808+
// parsed in case we recursively try to parse it from itself.
1809+
auto AddResult = PreParsedModuleMaps.insert(std::make_pair(File, true));
1810+
if (!AddResult.second)
1811+
return AddResult.first->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap;
1812+
1813+
if (ModMap.preParseModuleMapFile(File, IsSystem, Dir, ID)) {
1814+
PreParsedModuleMaps[File] = false;
1815+
return LMM_InvalidModuleMap;
1816+
}
1817+
1818+
// Try to load a corresponding private module map.
1819+
if (OptionalFileEntryRef PMMFile =
1820+
getPrivateModuleMap(File, FileMgr, Diags)) {
1821+
if (ModMap.preParseModuleMapFile(*PMMFile, IsSystem, Dir)) {
1822+
PreParsedModuleMaps[File] = false;
1823+
return LMM_InvalidModuleMap;
1824+
}
1825+
}
1826+
1827+
// This directory has a module map.
1828+
return LMM_NewlyLoaded;
1829+
}
1830+
18041831
OptionalFileEntryRef
18051832
HeaderSearch::lookupModuleMapFile(DirectoryEntryRef Dir, bool IsFramework) {
18061833
if (!HSOpts->ImplicitModuleMaps)
@@ -1853,7 +1880,7 @@ Module *HeaderSearch::loadFrameworkModule(StringRef Name, DirectoryEntryRef Dir,
18531880
break;
18541881
}
18551882

1856-
return ModMap.findModule(Name);
1883+
return ModMap.findOrLoadModule(Name);
18571884
}
18581885

18591886
HeaderSearch::LoadModuleMapResult
@@ -1869,8 +1896,16 @@ HeaderSearch::LoadModuleMapResult
18691896
HeaderSearch::loadModuleMapFile(DirectoryEntryRef Dir, bool IsSystem,
18701897
bool IsFramework) {
18711898
auto KnownDir = DirectoryHasModuleMap.find(Dir);
1872-
if (KnownDir != DirectoryHasModuleMap.end())
1873-
return KnownDir->second ? LMM_AlreadyLoaded : LMM_InvalidModuleMap;
1899+
if (KnownDir != DirectoryHasModuleMap.end()) {
1900+
switch (KnownDir->second) {
1901+
case MMDS_Parsed:
1902+
break;
1903+
case MMDS_Loaded:
1904+
return LMM_AlreadyLoaded;
1905+
case MMDS_Invalid:
1906+
return LMM_InvalidModuleMap;
1907+
};
1908+
}
18741909

18751910
if (OptionalFileEntryRef ModuleMapFile =
18761911
lookupModuleMapFile(Dir, IsFramework)) {
@@ -1880,9 +1915,49 @@ HeaderSearch::loadModuleMapFile(DirectoryEntryRef Dir, bool IsSystem,
18801915
// E.g. Foo.framework/Modules/module.modulemap
18811916
// ^Dir ^ModuleMapFile
18821917
if (Result == LMM_NewlyLoaded)
1883-
DirectoryHasModuleMap[Dir] = true;
1918+
DirectoryHasModuleMap[Dir] = MMDS_Loaded;
1919+
else if (Result == LMM_InvalidModuleMap)
1920+
DirectoryHasModuleMap[Dir] = MMDS_Invalid;
1921+
return Result;
1922+
}
1923+
return LMM_InvalidModuleMap;
1924+
}
1925+
1926+
HeaderSearch::LoadModuleMapResult
1927+
HeaderSearch::parseModuleMapFile(StringRef DirName, bool IsSystem,
1928+
bool IsFramework) {
1929+
if (auto Dir = FileMgr.getOptionalDirectoryRef(DirName))
1930+
return parseModuleMapFile(*Dir, IsSystem, IsFramework);
1931+
1932+
return LMM_NoDirectory;
1933+
}
1934+
1935+
HeaderSearch::LoadModuleMapResult
1936+
HeaderSearch::parseModuleMapFile(DirectoryEntryRef Dir, bool IsSystem,
1937+
bool IsFramework) {
1938+
// Check if this modulemap has already been fully loaded, if so skip it.
1939+
auto KnownDir = DirectoryHasModuleMap.find(Dir);
1940+
if (KnownDir != DirectoryHasModuleMap.end()) {
1941+
switch (KnownDir->second) {
1942+
case MMDS_Parsed:
1943+
case MMDS_Loaded:
1944+
return LMM_AlreadyLoaded;
1945+
case MMDS_Invalid:
1946+
return LMM_InvalidModuleMap;
1947+
};
1948+
}
1949+
1950+
if (OptionalFileEntryRef ModuleMapFile =
1951+
lookupModuleMapFile(Dir, IsFramework)) {
1952+
LoadModuleMapResult Result =
1953+
parseModuleMapFileImpl(*ModuleMapFile, IsSystem, Dir);
1954+
// Add Dir explicitly in case ModuleMapFile is in a subdirectory.
1955+
// E.g. Foo.framework/Modules/module.modulemap
1956+
// ^Dir ^ModuleMapFile
1957+
if (Result == LMM_NewlyLoaded)
1958+
DirectoryHasModuleMap[Dir] = MMDS_Parsed;
18841959
else if (Result == LMM_InvalidModuleMap)
1885-
DirectoryHasModuleMap[Dir] = false;
1960+
DirectoryHasModuleMap[Dir] = MMDS_Invalid;
18861961
return Result;
18871962
}
18881963
return LMM_InvalidModuleMap;

0 commit comments

Comments
 (0)