Skip to content

Commit 21f18f1

Browse files
authored
Optimization: cache results between TypeScript projects (#182)
* Optimization: cache results between projects Towards #175 Previously, scip-typescript didn't cache anything at all between TypeScript projects. This commit implements an optimization so that we now cache the results of loading source files and parsing options. Benchmarks against the sourcegraph/sourcegraph repo indicate this optimization consistently speeds up the `index` command in all three multi-project repositories that I tested it with. - sourcegraph/sourcegraph: ~30% from ~100s to ~70s - nextautjs/next-auth: ~40% from 6.5s to 3.9 - xtermjs/xterm.js: ~45% from 7.3s to 4.1s For every repo, I additionally validated that the resulting index.scip has identical checksum before and after applying this optimization. Given these promising results, this new optimization is enabled by default, but can be disabled with the option `--no-global-cache`. *Test plan* Manually tested by running `scip-typescript index tsconfig.all.json` in the sourcegraph/sourcegraph repository. To benchmark the difference for this PR: - Checkout the code - Run `yarn tsc -b` - Go to the directory of your project - Run `node PATH_TO_SCIP_TYPESCRIPT/dist/src/main.js` - Copy the "optimized" index.scip with `cp index.scip index-withcache.scip` - Run `node PATH_TO_SCIP_TYPESCRIPT/dist/src/main.js --no-global-caches` - Validate the checksum is identical from the optimized output `shasum -a 256 *.scip` * Fix failing tests * Address review comments
1 parent 9f5d08a commit 21f18f1

File tree

5 files changed

+145
-41
lines changed

5 files changed

+145
-41
lines changed

src/CommandLineOptions.test.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ function checkIndexParser(
2424

2525
// defaults
2626
checkIndexParser([], {
27-
progressBar: true,
2827
cwd: process.cwd(),
2928
inferTsconfig: false,
3029
output: 'index.scip',
@@ -35,3 +34,5 @@ checkIndexParser(['--cwd', 'qux'], { cwd: 'qux' })
3534
checkIndexParser(['--yarn-workspaces'], { yarnWorkspaces: true })
3635
checkIndexParser(['--infer-tsconfig'], { inferTsconfig: true })
3736
checkIndexParser(['--no-progress-bar'], { progressBar: false })
37+
checkIndexParser(['--progress-bar'], { progressBar: true })
38+
checkIndexParser(['--no-global-caches'], { globalCaches: false })

src/CommandLineOptions.ts

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import { Command } from 'commander'
2+
// eslint-disable-next-line id-length
3+
import ts from 'typescript'
24

35
import packageJson from '../package.json'
46

@@ -10,6 +12,7 @@ export interface MultiProjectOptions {
1012
progressBar: boolean
1113
yarnWorkspaces: boolean
1214
yarnBerryWorkspaces: boolean
15+
globalCaches: boolean
1316
cwd: string
1417
output: string
1518
indexedProjects: Set<string>
@@ -22,6 +25,15 @@ export interface ProjectOptions extends MultiProjectOptions {
2225
writeIndex: (index: lsif.lib.codeintel.lsiftyped.Index) => void
2326
}
2427

28+
/** Cached values */
29+
export interface GlobalCache {
30+
sources: Map<
31+
string,
32+
[ts.SourceFile | undefined, ts.ScriptTarget | ts.CreateSourceFileOptions]
33+
>
34+
parsedCommandLines: Map<string, ts.ParsedCommandLine>
35+
}
36+
2537
export function mainCommand(
2638
indexAction: (projects: string[], otpions: MultiProjectOptions) => void
2739
): Command {
@@ -47,7 +59,12 @@ export function mainCommand(
4759
false
4860
)
4961
.option('--output <path>', 'path to the output file', 'index.scip')
50-
.option('--no-progress-bar', 'whether to disable the progress bar')
62+
.option('--progress-bar', 'whether to enable a rich progress bar')
63+
.option('--no-progress-bar', 'whether to disable the rich progress bar')
64+
.option(
65+
'--no-global-caches',
66+
'whether to disable global caches between TypeScript projects'
67+
)
5168
.argument('[projects...]')
5269
.action((parsedProjects, parsedOptions) => {
5370
indexAction(

src/ProjectIndexer.ts

+98-26
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,73 @@
11
import * as path from 'path'
2-
import { Writable as WritableStream } from 'stream'
32

43
import prettyMilliseconds from 'pretty-ms'
54
import ProgressBar from 'progress'
65
import * as ts from 'typescript'
76

8-
import { ProjectOptions } from './CommandLineOptions'
7+
import { GlobalCache, ProjectOptions } from './CommandLineOptions'
98
import { FileIndexer } from './FileIndexer'
109
import { Input } from './Input'
1110
import * as lsif from './lsif'
1211
import { LsifSymbol } from './LsifSymbol'
1312
import { Packages } from './Packages'
1413

14+
function createCompilerHost(
15+
cache: GlobalCache,
16+
compilerOptions: ts.CompilerOptions,
17+
projectOptions: ProjectOptions
18+
): ts.CompilerHost {
19+
const host = ts.createCompilerHost(compilerOptions)
20+
if (!projectOptions.globalCaches) {
21+
return host
22+
}
23+
const hostCopy = { ...host }
24+
host.getParsedCommandLine = (fileName: string) => {
25+
if (!hostCopy.getParsedCommandLine) {
26+
return undefined
27+
}
28+
const fromCache = cache.parsedCommandLines.get(fileName)
29+
if (fromCache !== undefined) {
30+
return fromCache
31+
}
32+
const result = hostCopy.getParsedCommandLine(fileName)
33+
if (result !== undefined) {
34+
// Don't cache undefined results even if they could be cached
35+
// theoretically. The big performance gains from this cache come from
36+
// caching non-undefined results.
37+
cache.parsedCommandLines.set(fileName, result)
38+
}
39+
return result
40+
}
41+
host.getSourceFile = (
42+
fileName,
43+
languageVersion,
44+
onError,
45+
shouldCreateNewSourceFile
46+
) => {
47+
const fromCache = cache.sources.get(fileName)
48+
if (fromCache !== undefined) {
49+
const [sourceFile, cachedLanguageVersion] = fromCache
50+
if (isSameLanguageVersion(languageVersion, cachedLanguageVersion)) {
51+
return sourceFile
52+
}
53+
}
54+
const result = hostCopy.getSourceFile(
55+
fileName,
56+
languageVersion,
57+
onError,
58+
shouldCreateNewSourceFile
59+
)
60+
if (result !== undefined) {
61+
// Don't cache undefined results even if they could be cached
62+
// theoretically. The big performance gains from this cache come from
63+
// caching non-undefined results.
64+
cache.sources.set(fileName, [result, languageVersion])
65+
}
66+
return result
67+
}
68+
return host
69+
}
70+
1571
export class ProjectIndexer {
1672
private options: ProjectOptions
1773
private program: ts.Program
@@ -20,10 +76,12 @@ export class ProjectIndexer {
2076
private packages: Packages
2177
constructor(
2278
public readonly config: ts.ParsedCommandLine,
23-
options: ProjectOptions
79+
options: ProjectOptions,
80+
cache: GlobalCache
2481
) {
2582
this.options = options
26-
this.program = ts.createProgram(config.fileNames, config.options)
83+
const host = createCompilerHost(cache, config.options, options)
84+
this.program = ts.createProgram(config.fileNames, config.options, host)
2785
this.checker = this.program.getTypeChecker()
2886
this.packages = new Packages(options.projectRoot)
2987
}
@@ -47,24 +105,24 @@ export class ProjectIndexer {
47105
)
48106
}
49107

50-
const jobs = new ProgressBar(
51-
` ${this.options.projectDisplayName} [:bar] :current/:total :title`,
52-
{
53-
total: filesToIndex.length,
54-
renderThrottle: 100,
55-
incomplete: '_',
56-
complete: '#',
57-
width: 20,
58-
clear: true,
59-
stream: this.options.progressBar
60-
? process.stderr
61-
: writableNoopStream(),
62-
}
63-
)
108+
const jobs: ProgressBar | undefined = !this.options.progressBar
109+
? undefined
110+
: new ProgressBar(
111+
` ${this.options.projectDisplayName} [:bar] :current/:total :title`,
112+
{
113+
total: filesToIndex.length,
114+
renderThrottle: 100,
115+
incomplete: '_',
116+
complete: '#',
117+
width: 20,
118+
clear: true,
119+
stream: process.stderr,
120+
}
121+
)
64122
let lastWrite = startTimestamp
65123
for (const [index, sourceFile] of filesToIndex.entries()) {
66124
const title = path.relative(this.options.cwd, sourceFile.fileName)
67-
jobs.tick({ title })
125+
jobs?.tick({ title })
68126
if (!this.options.progressBar) {
69127
const now = Date.now()
70128
const elapsed = now - lastWrite
@@ -102,7 +160,7 @@ export class ProjectIndexer {
102160
)
103161
}
104162
}
105-
jobs.terminate()
163+
jobs?.terminate()
106164
const elapsed = Date.now() - startTimestamp
107165
if (!this.options.progressBar && lastWrite > startTimestamp) {
108166
process.stdout.write('\n')
@@ -113,10 +171,24 @@ export class ProjectIndexer {
113171
}
114172
}
115173

116-
function writableNoopStream(): WritableStream {
117-
return new WritableStream({
118-
write(_unused1, _unused2, callback) {
119-
setImmediate(callback)
120-
},
121-
})
174+
function isSameLanguageVersion(
175+
a: ts.ScriptTarget | ts.CreateSourceFileOptions,
176+
b: ts.ScriptTarget | ts.CreateSourceFileOptions
177+
): boolean {
178+
if (typeof a === 'number' && typeof b === 'number') {
179+
return a === b
180+
}
181+
if (typeof a === 'number' || typeof b === 'number') {
182+
// Different shape: one is ts.ScriptTarget, the other is
183+
// ts.CreateSourceFileOptions
184+
return false
185+
}
186+
return (
187+
a.languageVersion === b.languageVersion &&
188+
a.impliedNodeFormat === b.impliedNodeFormat
189+
// Ignore setExternalModuleIndicator even if that increases the risk of a
190+
// false positive. A local experiment revealed that we never get a cache hit
191+
// if we compare setExternalModuleIndicator since it's function with a
192+
// unique reference on every `CompilerHost.getSourceFile` callback.
193+
)
122194
}

src/main.test.ts

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ for (const snapshotDirectory of snapshotDirectories) {
5454
yarnBerryWorkspaces: false,
5555
progressBar: false,
5656
indexedProjects: new Set(),
57+
globalCaches: true,
5758
})
5859
if (inferTsconfig) {
5960
fs.rmSync(tsconfigJsonPath)

src/main.ts

+26-13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import * as ts from 'typescript'
99
import packageJson from '../package.json'
1010

1111
import {
12+
GlobalCache,
1213
mainCommand,
1314
MultiProjectOptions,
1415
ProjectOptions,
@@ -48,6 +49,11 @@ export function indexCommand(
4849
documentCount += index.documents.length
4950
fs.writeSync(output, index.serializeBinary())
5051
}
52+
53+
const cache: GlobalCache = {
54+
sources: new Map(),
55+
parsedCommandLines: new Map(),
56+
}
5157
try {
5258
writeIndex(
5359
new lsiftyped.Index({
@@ -67,12 +73,15 @@ export function indexCommand(
6773
// they can have dependencies.
6874
for (const projectRoot of projects) {
6975
const projectDisplayName = projectRoot === '.' ? options.cwd : projectRoot
70-
indexSingleProject({
71-
...options,
72-
projectRoot,
73-
projectDisplayName,
74-
writeIndex,
75-
})
76+
indexSingleProject(
77+
{
78+
...options,
79+
projectRoot,
80+
projectDisplayName,
81+
writeIndex,
82+
},
83+
cache
84+
)
7685
}
7786
} finally {
7887
fs.close(output)
@@ -96,10 +105,11 @@ function makeAbsolutePath(cwd: string, relativeOrAbsolutePath: string): string {
96105
return path.resolve(cwd, relativeOrAbsolutePath)
97106
}
98107

99-
function indexSingleProject(options: ProjectOptions): void {
108+
function indexSingleProject(options: ProjectOptions, cache: GlobalCache): void {
100109
if (options.indexedProjects.has(options.projectRoot)) {
101110
return
102111
}
112+
103113
options.indexedProjects.add(options.projectRoot)
104114
let config = ts.parseCommandLine(
105115
['-p', options.projectRoot],
@@ -125,15 +135,18 @@ function indexSingleProject(options: ProjectOptions): void {
125135
}
126136

127137
for (const projectReference of config.projectReferences || []) {
128-
indexSingleProject({
129-
...options,
130-
projectRoot: projectReference.path,
131-
projectDisplayName: projectReference.path,
132-
})
138+
indexSingleProject(
139+
{
140+
...options,
141+
projectRoot: projectReference.path,
142+
projectDisplayName: projectReference.path,
143+
},
144+
cache
145+
)
133146
}
134147

135148
if (config.fileNames.length > 0) {
136-
new ProjectIndexer(config, options).index()
149+
new ProjectIndexer(config, options, cache).index()
137150
}
138151
}
139152

0 commit comments

Comments
 (0)