Skip to content

lintcheck: parallelize #6764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clippy_dev/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ shell-escape = "0.1"
tar = { version = "0.4.30", optional = true }
toml = { version = "0.5", optional = true }
ureq = { version = "2.0.0-rc3", optional = true }
rayon = { version = "1.5.0", optional = true }
walkdir = "2"

[features]
lintcheck = ["flate2", "serde_json", "tar", "toml", "ureq", "serde", "fs_extra"]
lintcheck = ["flate2", "serde_json", "tar", "toml", "ureq", "serde", "fs_extra", "rayon"]
deny-warnings = []
89 changes: 76 additions & 13 deletions clippy_dev/src/lintcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ use crate::clippy_project_root;

use std::collections::HashMap;
use std::process::Command;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{env, fmt, fs::write, path::PathBuf};

use clap::ArgMatches;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::Value;

Expand All @@ -37,7 +39,7 @@ struct TomlCrate {

/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq)]
#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
enum CrateSource {
CratesIo {
name: String,
Expand Down Expand Up @@ -215,11 +217,34 @@ impl CrateSource {
impl Crate {
/// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
/// issued
fn run_clippy_lints(&self, cargo_clippy_path: &PathBuf) -> Vec<ClippyWarning> {
println!("Linting {} {}...", &self.name, &self.version);
fn run_clippy_lints(
&self,
cargo_clippy_path: &PathBuf,
target_dir_index: &AtomicUsize,
thread_limit: usize,
total_crates_to_lint: usize,
) -> Vec<ClippyWarning> {
// advance the atomic index by one
let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
// "loop" the index within 0..thread_limit
let target_dir_index = index % thread_limit;
let perc = ((index * 100) as f32 / total_crates_to_lint as f32) as u8;

if thread_limit == 1 {
println!(
"{}/{} {}% Linting {} {}",
index, total_crates_to_lint, perc, &self.name, &self.version
);
} else {
println!(
"{}/{} {}% Linting {} {} in target dir {:?}",
index, total_crates_to_lint, perc, &self.name, &self.version, target_dir_index
);
}

let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();

let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir/");
let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");

let mut args = vec!["--", "--message-format=json", "--", "--cap-lints=warn"];

Expand All @@ -232,7 +257,11 @@ impl Crate {
}

let all_output = std::process::Command::new(&cargo_clippy_path)
.env("CARGO_TARGET_DIR", shared_target_dir)
// use the looping index to create individual target dirs
.env(
"CARGO_TARGET_DIR",
shared_target_dir.join(format!("_{:?}", target_dir_index)),
)
// lint warnings will look like this:
// src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
.args(&args)
Expand Down Expand Up @@ -283,13 +312,13 @@ fn filter_clippy_warnings(line: &str) -> bool {

/// Builds clippy inside the repo to make sure we have a clippy executable we can use.
fn build_clippy() {
let output = Command::new("cargo")
let status = Command::new("cargo")
.arg("build")
.output()
.status()
.expect("Failed to build clippy!");
if !output.status.success() {
eprintln!("Failed to compile Clippy");
eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr))
if !status.success() {
eprintln!("Error: Failed to compile Clippy!");
std::process::exit(1);
}
}

Expand Down Expand Up @@ -356,6 +385,9 @@ fn read_crates(toml_path: Option<&str>) -> (String, Vec<CrateSource>) {
unreachable!("Failed to translate TomlCrate into CrateSource!");
}
});
// sort the crates
crate_sources.sort();

(toml_filename, crate_sources)
}

Expand Down Expand Up @@ -454,15 +486,46 @@ pub fn run(clap_config: &ArgMatches) {
.into_iter()
.map(|krate| krate.download_and_extract())
.filter(|krate| krate.name == only_one_crate)
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &AtomicUsize::new(0), 1, 1))
.flatten()
.collect()
} else {
let counter = std::sync::atomic::AtomicUsize::new(0);

// Ask rayon for thread count. Assume that half of that is the number of physical cores
// Use one target dir for each core so that we can run N clippys in parallel.
// We need to use different target dirs because cargo would lock them for a single build otherwise,
// killing the parallelism. However this also means that deps will only be reused half/a
// quarter of the time which might result in a longer wall clock runtime

// This helps when we check many small crates with dep-trees that don't have a lot of branches in
// order to achive some kind of parallelism

// by default, use a single thread
let num_cpus = match clap_config.value_of("threads") {
Some(threads) => {
let threads: usize = threads
.parse()
.expect(&format!("Failed to parse '{}' to a digit", threads));
if threads == 0 {
// automatic choice
// Rayon seems to return thread count so half that for core count
(rayon::current_num_threads() / 2) as usize
} else {
threads
}
},
// no -j passed, use a single thread
None => 1,
};

let num_crates = crates.len();

// check all crates (default)
crates
.into_iter()
.into_par_iter()
.map(|krate| krate.download_and_extract())
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates))
.flatten()
.collect()
};
Expand Down
8 changes: 8 additions & 0 deletions clippy_dev/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ fn get_clap_config<'a>() -> ArgMatches<'a> {
.value_name("CRATES-SOURCES-TOML-PATH")
.long("crates-toml")
.help("set the path for a crates.toml where lintcheck should read the sources from"),
)
.arg(
Arg::with_name("threads")
.takes_value(true)
.value_name("N")
.short("j")
.long("jobs")
.help("number of threads to use, 0 automatic choice"),
);

let app = App::new("Clippy developer tooling")
Expand Down
2 changes: 1 addition & 1 deletion lintcheck-logs/lintcheck_crates_logs.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
clippy 0.1.52 (bed115d55 2021-02-15)
clippy 0.1.52 (bb5f9d18a 2021-02-19)

cargo-0.49.0/build.rs:1:null clippy::cargo_common_metadata "package `cargo` is missing `package.categories` metadata"
cargo-0.49.0/build.rs:1:null clippy::cargo_common_metadata "package `cargo` is missing `package.keywords` metadata"
Expand Down