Skip to content

Commit 74b92b0

Browse files
[IncludeTree] IncludeTreeFileList optimizations
Optimize IncludeTreeFileList to make creation and iterating to have less overhead and easier to use: * Unify the conflict file detection code with the code that uniquing the file from nested FileList so there is no need to use separate data structures. * Extend the conflict file content detection to iterating method `forEachFile` so it is easier to discover the problem with no overhead since no extra data structure is needed. * Improve the IncludeTreeFileSystem constructor to share more functions between two versions. * Remove an unnecessary requirement that IncludeTree::FileList must contain FileEntries. It can have an empty file entries list but provides nesting FileList accesses.
1 parent 2be6f8f commit 74b92b0

File tree

3 files changed

+81
-57
lines changed

3 files changed

+81
-57
lines changed

clang/include/clang/CAS/IncludeTree.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ class IncludeTree::FileList : public IncludeTreeBase<FileList> {
313313
FileSizeTy getFileSize(size_t I) const;
314314

315315
llvm::Error
316-
forEachFileImpl(llvm::DenseSet<ObjectRef> &Seen,
316+
forEachFileImpl(llvm::DenseMap<ObjectRef, ObjectRef> &Seen,
317317
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback);
318318

319319
static bool isValid(const ObjectProxy &Node);
@@ -864,16 +864,16 @@ class IncludeTreeRoot : public IncludeTreeBase<IncludeTreeRoot> {
864864

865865
/// An implementation of a \p vfs::FileSystem that supports the simple queries
866866
/// of the preprocessor, for creating \p FileEntries using a file path, while
867-
/// "replaying" an \p IncludeTreeRoot. It is not intended to be a complete
867+
/// "replaying" an \p IncludeTree::FileList. It is not intended to be a complete
868868
/// implementation of a file system.
869869
Expected<IntrusiveRefCntPtr<llvm::vfs::FileSystem>>
870-
createIncludeTreeFileSystem(IncludeTreeRoot &Root);
870+
createIncludeTreeFileSystem(IncludeTree::FileList &List);
871871

872872
/// Create the same IncludeTreeFileSystem but from
873873
/// ArrayRef<IncludeTree::FileEntry>.
874874
Expected<IntrusiveRefCntPtr<llvm::vfs::FileSystem>> createIncludeTreeFileSystem(
875875
llvm::cas::ObjectStore &CAS,
876-
llvm::ArrayRef<IncludeTree::FileList::FileEntry> List);
876+
llvm::ArrayRef<IncludeTree::FileList::FileEntry> Files);
877877

878878
} // namespace cas
879879
} // namespace clang

clang/lib/CAS/IncludeTree.cpp

+72-52
Original file line numberDiff line numberDiff line change
@@ -246,24 +246,49 @@ IncludeTree::FileList::getFileSize(size_t I) const {
246246
Data.data() + I * sizeof(FileSizeTy));
247247
}
248248

249+
static llvm::Error diagnoseFileChange(IncludeTree::File F, ObjectRef Content) {
250+
auto FilenameBlob = F.getFilename();
251+
if (!FilenameBlob)
252+
return FilenameBlob.takeError();
253+
cas::ObjectStore &DB = F.getCAS();
254+
std::string Filename(FilenameBlob->getData());
255+
std::string OldID = DB.getID(Content).toString();
256+
std::string NewID = DB.getID(F.getContentsRef()).toString();
257+
return llvm::createStringError(llvm::inconvertibleErrorCode(),
258+
"file '%s' changed during build; include-tree "
259+
"contents changed from %s to %s",
260+
Filename.c_str(), OldID.c_str(),
261+
NewID.c_str());
262+
}
263+
249264
llvm::Error IncludeTree::FileList::forEachFileImpl(
250-
llvm::DenseSet<ObjectRef> &Seen,
265+
llvm::DenseMap<ObjectRef, ObjectRef> &Seen,
251266
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback) {
252267
size_t Next = 0;
253268
size_t FileCount = getNumFilesCurrentList();
269+
254270
return forEachReference([&](ObjectRef Ref) -> llvm::Error {
255271
size_t Index = Next++;
256-
if (!Seen.insert(Ref).second)
257-
return llvm::Error::success();
258-
259272
if (Index < FileCount) {
260273
auto Include = File::get(getCAS(), Ref);
261274
if (!Include)
262275
return Include.takeError();
276+
auto Inserted = Seen.try_emplace(Ref, Include->getContentsRef());
277+
if (!Inserted.second) {
278+
if (Inserted.first->second != Include->getContentsRef())
279+
return diagnoseFileChange(*Include, Inserted.first->second);
280+
return llvm::Error::success();
281+
}
263282
return Callback(std::move(*Include), getFileSize(Index));
264283
}
265284

266285
// Otherwise, it's a chained FileList.
286+
// Use a value to itself to indicate if the filelist node has been
287+
// visited or not.
288+
auto Inserted = Seen.try_emplace(Ref, Ref);
289+
if (!Inserted.second)
290+
return llvm::Error::success();
291+
267292
auto Proxy = getCAS().getProxy(Ref);
268293
if (!Proxy)
269294
return Proxy.takeError();
@@ -274,7 +299,7 @@ llvm::Error IncludeTree::FileList::forEachFileImpl(
274299

275300
llvm::Error IncludeTree::FileList::forEachFile(
276301
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback) {
277-
llvm::DenseSet<ObjectRef> Seen;
302+
llvm::DenseMap<ObjectRef, ObjectRef> Seen;
278303
return forEachFileImpl(Seen, Callback);
279304
}
280305

@@ -321,7 +346,7 @@ bool IncludeTree::FileList::isValid(const ObjectProxy &Node) {
321346
return false;
322347
unsigned NumFiles =
323348
llvm::support::endian::read<uint32_t, llvm::endianness::little>(Data.data());
324-
return NumFiles != 0 && NumFiles <= Base.getNumReferences() &&
349+
return NumFiles <= Base.getNumReferences() &&
325350
Data.size() == sizeof(uint32_t) + NumFiles * sizeof(FileSizeTy);
326351
}
327352

@@ -1015,38 +1040,19 @@ class IncludeTreeFileSystem : public llvm::vfs::FileSystem {
10151040
};
10161041
} // namespace
10171042

1018-
static llvm::Error diagnoseFileChange(IncludeTree::File F, ObjectRef Content) {
1019-
auto FilenameBlob = F.getFilename();
1020-
if (!FilenameBlob)
1021-
return FilenameBlob.takeError();
1022-
cas::ObjectStore &DB = F.getCAS();
1023-
std::string Filename(FilenameBlob->getData());
1024-
std::string OldID = DB.getID(Content).toString();
1025-
std::string NewID = DB.getID(F.getContentsRef()).toString();
1026-
return llvm::createStringError(llvm::inconvertibleErrorCode(),
1027-
"file '%s' changed during build; include-tree "
1028-
"contents changed from %s to %s",
1029-
Filename.c_str(), OldID.c_str(),
1030-
NewID.c_str());
1031-
}
1032-
1033-
Expected<IntrusiveRefCntPtr<llvm::vfs::FileSystem>>
1034-
cas::createIncludeTreeFileSystem(IncludeTreeRoot &Root) {
1035-
auto FileList = Root.getFileList();
1036-
if (!FileList)
1037-
return FileList.takeError();
1038-
1039-
std::vector<IncludeTree::FileList::FileEntry> Files;
1040-
Files.reserve(FileList->getNumReferences());
1041-
1042-
if (auto Err = FileList->forEachFile(
1043-
[&](IncludeTree::File File, IncludeTree::FileList::FileSizeTy Size) {
1044-
Files.push_back({File.getRef(), Size});
1045-
return llvm::Error::success();
1046-
}))
1047-
return std::move(Err);
1048-
1049-
return createIncludeTreeFileSystem(Root.getCAS(), Files);
1043+
static std::string computeFilename(StringRef Name, IncludeTreeFileSystem &FS) {
1044+
SmallString<128> Filename(Name);
1045+
assert(Filename != ".");
1046+
llvm::sys::path::remove_dots(Filename);
1047+
1048+
StringRef DirName = llvm::sys::path::parent_path(Filename);
1049+
if (DirName.empty())
1050+
DirName = ".";
1051+
auto &DirEntry = FS.Directories[DirName];
1052+
if (DirEntry == llvm::sys::fs::UniqueID()) {
1053+
DirEntry = llvm::vfs::getNextVirtualUniqueID();
1054+
}
1055+
return Filename.str().str();
10501056
}
10511057

10521058
Expected<IntrusiveRefCntPtr<llvm::vfs::FileSystem>>
@@ -1077,20 +1083,7 @@ cas::createIncludeTreeFileSystem(
10771083
if (!FilenameBlob)
10781084
return FilenameBlob.takeError();
10791085

1080-
SmallString<128> Filename(FilenameBlob->getData());
1081-
// Strip './' in the filename to match the behaviour of ASTWriter; we
1082-
// also strip './' in IncludeTreeFileSystem::getPath.
1083-
assert(Filename != ".");
1084-
llvm::sys::path::remove_dots(Filename);
1085-
1086-
StringRef DirName = llvm::sys::path::parent_path(Filename);
1087-
if (DirName.empty())
1088-
DirName = ".";
1089-
auto &DirEntry = IncludeTreeFS->Directories[DirName];
1090-
if (DirEntry == llvm::sys::fs::UniqueID()) {
1091-
DirEntry = llvm::vfs::getNextVirtualUniqueID();
1092-
}
1093-
1086+
auto Filename = computeFilename(FilenameBlob->getData(), *IncludeTreeFS);
10941087
IncludeTreeFS->Files.insert(std::make_pair(
10951088
Filename,
10961089
IncludeTreeFileSystem::FileEntry{File->getContentsRef(), Entry.Size,
@@ -1099,3 +1092,30 @@ cas::createIncludeTreeFileSystem(
10991092

11001093
return IncludeTreeFS;
11011094
}
1095+
1096+
Expected<IntrusiveRefCntPtr<llvm::vfs::FileSystem>>
1097+
cas::createIncludeTreeFileSystem(IncludeTree::FileList &List) {
1098+
auto &CAS = List.getCAS();
1099+
IntrusiveRefCntPtr<IncludeTreeFileSystem> IncludeTreeFS =
1100+
new IncludeTreeFileSystem(CAS);
1101+
1102+
auto E = List.forEachFile(
1103+
[&](IncludeTree::File File,
1104+
IncludeTree::FileList::FileSizeTy Size) -> llvm::Error {
1105+
auto FilenameBlob = File.getFilename();
1106+
if (!FilenameBlob)
1107+
return FilenameBlob.takeError();
1108+
auto Filename =
1109+
computeFilename(FilenameBlob->getData(), *IncludeTreeFS);
1110+
IncludeTreeFS->Files.insert(
1111+
std::make_pair(Filename, IncludeTreeFileSystem::FileEntry{
1112+
File.getContentsRef(), Size,
1113+
llvm::vfs::getNextVirtualUniqueID()}));
1114+
return llvm::Error::success();
1115+
});
1116+
1117+
if (E)
1118+
return std::move(E);
1119+
1120+
return IncludeTreeFS;
1121+
}

clang/lib/Frontend/CompilerInvocation.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -1611,7 +1611,11 @@ createBaseFS(const FileSystemOptions &FSOpts, const FrontendOptions &FEOpts,
16111611
auto Root = cas::IncludeTreeRoot::get(*CAS, *Ref);
16121612
if (!Root)
16131613
return Root.takeError();
1614-
return cas::createIncludeTreeFileSystem(*Root);
1614+
auto FileList = Root->getFileList();
1615+
if (!FileList)
1616+
return FileList.takeError();
1617+
1618+
return cas::createIncludeTreeFileSystem(*FileList);
16151619
};
16161620
auto makeCASFS = [&](std::shared_ptr<llvm::cas::ObjectStore> CAS,
16171621
llvm::cas::CASID &ID)

0 commit comments

Comments
 (0)