Skip to content

Commit 16c0bea

Browse files
authored
Merge pull request #1004 from marcnjaramillo/fix-large-sarif-handling
Fix large sarif handling
2 parents 30d01cb + ad81127 commit 16c0bea

File tree

10 files changed

+305
-20
lines changed

10 files changed

+305
-20
lines changed

extensions/ql-vscode/CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
## [UNRELEASED]
44

5-
- Fix the _CodeQL: Open Referenced File_ command for Windows systems. [#979](https://github.com/github/vscode-codeql/pull/979)
5+
- Fix the _CodeQL: Open Referenced File_ command for Windows systems. [#979](https://github.com/github/vscode-codeql/pull/979)
6+
- Support large SARIF results files (>4GB) without crashing VS Code. [#1004](https://github.com/github/vscode-codeql/pull/1004)
67
- Fix a bug that shows 'Set current database' when hovering over the currently selected database in the databases view. [#976](https://github.com/github/vscode-codeql/pull/976)
78
- Fix a bug with importing large databases. Databases over 4GB can now be imported directly from LGTM or from a zip file. This functionality is only available when using CodeQL CLI version 2.6.0 or later. [#971](https://github.com/github/vscode-codeql/pull/971)
89
- Replace certain control codes (`U+0000` - `U+001F`) with their corresponding control labels (`U+2400` - `U+241F`) in the results view. [#963](https://github.com/github/vscode-codeql/pull/963)

extensions/ql-vscode/package-lock.json

+96
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

extensions/ql-vscode/package.json

+5
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,9 @@
992992
"react": "^16.8.6",
993993
"react-dom": "^16.8.6",
994994
"semver": "~7.3.2",
995+
"stream": "^0.0.2",
996+
"stream-chain": "~2.2.4",
997+
"stream-json": "~1.7.3",
995998
"tmp": "^0.1.0",
996999
"tmp-promise": "~3.0.2",
9971000
"tree-kill": "~1.2.2",
@@ -1026,6 +1029,8 @@
10261029
"@types/semver": "~7.2.0",
10271030
"@types/sinon": "~7.5.2",
10281031
"@types/sinon-chai": "~3.2.3",
1032+
"@types/stream-chain": "~2.0.1",
1033+
"@types/stream-json": "~1.7.1",
10291034
"@types/through2": "^2.0.36",
10301035
"@types/tmp": "^0.1.0",
10311036
"@types/unzipper": "~0.10.1",

extensions/ql-vscode/src/cli.ts

+3-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import * as cpp from 'child-process-promise';
22
import * as child_process from 'child_process';
3-
import * as fs from 'fs-extra';
43
import * as path from 'path';
54
import * as sarif from 'sarif';
65
import { SemVer } from 'semver';
@@ -17,6 +16,7 @@ import { assertNever } from './pure/helpers-pure';
1716
import { QueryMetadata, SortDirection } from './pure/interface-types';
1817
import { Logger, ProgressReporter } from './logging';
1918
import { CompilationMessage } from './pure/messages';
19+
import { sarifParser } from './sarif-parser';
2020
import { dbSchemeToLanguage } from './helpers';
2121

2222
/**
@@ -696,22 +696,7 @@ export class CodeQLCliServer implements Disposable {
696696

697697
async interpretBqrs(metadata: QueryMetadata, resultsPath: string, interpretedResultsPath: string, sourceInfo?: SourceInfo): Promise<sarif.Log> {
698698
await this.runInterpretCommand(SARIF_FORMAT, metadata, resultsPath, interpretedResultsPath, sourceInfo);
699-
700-
let output: string;
701-
try {
702-
output = await fs.readFile(interpretedResultsPath, 'utf8');
703-
} catch (e) {
704-
const rawMessage = e.stderr || e.message;
705-
const errorMessage = rawMessage.startsWith('Cannot create a string')
706-
? `SARIF too large. ${rawMessage}`
707-
: rawMessage;
708-
throw new Error(`Reading output of interpretation failed: ${errorMessage}`);
709-
}
710-
try {
711-
return JSON.parse(output) as sarif.Log;
712-
} catch (err) {
713-
throw new Error(`Parsing output of interpretation failed: ${err.stderr || err}`);
714-
}
699+
return await sarifParser(interpretedResultsPath);
715700
}
716701

717702
async generateResultsCsv(metadata: QueryMetadata, resultsPath: string, csvPath: string, sourceInfo?: SourceInfo): Promise<void> {
@@ -1157,7 +1142,7 @@ export class CliVersionConstraint {
11571142

11581143
/**
11591144
* CLI version where database registration was introduced
1160-
*/
1145+
*/
11611146
public static CLI_VERSION_WITH_DB_REGISTRATION = new SemVer('2.4.1');
11621147

11631148
/**
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import * as Sarif from 'sarif';
2+
import * as fs from 'fs-extra';
3+
import { parser } from 'stream-json';
4+
import { pick } from 'stream-json/filters/Pick';
5+
import Assembler = require('stream-json/Assembler');
6+
import { chain } from 'stream-chain';
7+
8+
const DUMMY_TOOL : Sarif.Tool = {driver: {name: ''}};
9+
10+
export async function sarifParser(interpretedResultsPath: string) : Promise<Sarif.Log> {
11+
try {
12+
// Parse the SARIF file into token streams, filtering out only the results array.
13+
const p = parser();
14+
const pipeline = chain([
15+
fs.createReadStream(interpretedResultsPath),
16+
p,
17+
pick({filter: 'runs.0.results'})
18+
]);
19+
20+
// Creates JavaScript objects from the token stream
21+
const asm = Assembler.connectTo(pipeline);
22+
23+
// Returns a constructed Log object with the results or an empty array if no results were found.
24+
// If the parser fails for any reason, it will reject the promise.
25+
return await new Promise((resolve, reject) => {
26+
pipeline.on('error', (error) => {
27+
reject(error);
28+
});
29+
30+
asm.on('done', (asm) => {
31+
32+
const log : Sarif.Log = {
33+
version: '2.1.0',
34+
runs: [
35+
{
36+
tool: DUMMY_TOOL,
37+
results: asm.current ?? []
38+
}
39+
]
40+
};
41+
42+
resolve(log);
43+
});
44+
});
45+
} catch (err) {
46+
throw new Error(`Parsing output of interpretation failed: ${err.stderr || err}`);
47+
}
48+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"version": "2.1.0",
3+
"$schema": "http://json.schemastore.org/sarif-2.1.0-rtm.4",
4+
"runs": [
5+
{
6+
"tool": {
7+
"driver": {
8+
"name": "ESLint",
9+
"informationUri": "https://eslint.org",
10+
"rules": [
11+
{
12+
"id": "no-unused-vars",
13+
"shortDescription": {
14+
"text": "disallow unused variables"
15+
},
16+
"helpUri": "https://eslint.org/docs/rules/no-unused-vars",
17+
"properties": {
18+
"category": "Variables"
19+
}
20+
}
21+
]
22+
}
23+
},
24+
"artifacts": [
25+
{
26+
"location": {
27+
"uri": "file:///C:/dev/sarif/sarif-tutorials/samples/Introduction/simple-example.js"
28+
}
29+
}
30+
],
31+
"results": []
32+
}
33+
]
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"version": "2.1.0",
3+
"$schema": "http://json.schemastore.org/sarif-2.1.0-rtm.4",
4+
"runs": [
5+
{
6+
"tool": {
7+
"driver": {
8+
"name": "ESLint",
9+
"informationUri": "https://eslint.org",
10+
"rules": [
11+
{
12+
"id": "no-unused-vars",
13+
"shortDescription": {
14+
"text": "disallow unused variables"
15+
},
16+
"helpUri": "https://eslint.org/docs/rules/no-unused-vars",
17+
"properties": {
18+
"category": "Variables"
19+
}
20+
}
21+
]
22+
}
23+
},
24+
"artifacts": [
25+
{
26+
"location": {
27+
"uri": "file:///C:/dev/sarif/sarif-tutorials/samples/Introduction/simple-example.js"
28+
}
29+
}
30+
]
31+
}
32+
]
33+
}

0 commit comments

Comments
 (0)