Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add critical path scheduler to improve build times #2019

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 165 additions & 7 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) {

} // namespace


bool EdgeQueue::EdgePriorityCompare::operator()(const Edge* e1, const Edge* e2) const {

This comment was marked as abuse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

graph.h which defines Edge isn't included in the header.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could template it. (And if you really mean for it to be just Edge*, you can static_assert that the template is a forward-declared type. As in

class Edge; // Fwd declare
...
    template <typename EdgeType>
    [[nodiscard]] bool operator()(const EdgeType* e1, const EdgeType* e2) const {
        static_assert(std::is_same_v<EdgeType, Edge>); // Avoiding including `graph.h`.
        ...

const int64_t ct1 = e1->critical_time();
const int64_t ct2 = e2->critical_time();
if (ct1 != ct2) {
return ct1 < ct2;
}
return e1->id_ < e2->id_;
}

Plan::Plan(Builder* builder)
: builder_(builder)
, command_edges_(0)
Expand All @@ -89,6 +99,7 @@ void Plan::Reset() {
}

bool Plan::AddTarget(const Node* target, string* err) {
targets_.push_back(target);
return AddSubTarget(target, NULL, err, NULL);
}

Expand Down Expand Up @@ -151,10 +162,7 @@ void Plan::EdgeWanted(const Edge* edge) {
Edge* Plan::FindWork() {
if (ready_.empty())
return NULL;
EdgeSet::iterator e = ready_.begin();
Edge* edge = *e;
ready_.erase(e);
return edge;
return ready_.pop();
}

void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) {
Expand All @@ -172,10 +180,12 @@ void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) {
Pool* pool = edge->pool();
if (pool->ShouldDelayEdge()) {
pool->DelayEdge(edge);
pool->RetrieveReadyEdges(&ready_);
EdgeSet new_edges;
pool->RetrieveReadyEdges(&new_edges);
ready_.push(new_edges.begin(), new_edges.end());
} else {
pool->EdgeScheduled(*edge);
ready_.insert(edge);
ready_.push(edge);
}
}

Expand All @@ -187,7 +197,9 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) {
// See if this job frees up any delayed jobs.
if (directly_wanted)
edge->pool()->EdgeFinished(*edge);
edge->pool()->RetrieveReadyEdges(&ready_);
EdgeSet new_edges;
edge->pool()->RetrieveReadyEdges(&new_edges);
ready_.push(new_edges.begin(), new_edges.end());

// The rest of this function only applies to successful commands.
if (result != kEdgeSucceeded)
Expand Down Expand Up @@ -424,6 +436,150 @@ void Plan::UnmarkDependents(const Node* node, set<Node*>* dependents) {
}
}

namespace {

template <typename T>
struct SeenBefore {
std::set<const T*>* seen_;

SeenBefore(std::set<const T*>* seen) : seen_(seen) {}

bool operator() (const T* item) {
// Return true if the item has been seen before
return !seen_->insert(item).second;
}
};

// Assign run_time_ms_ for all wanted edges, and returns total time for all edges
// For phony edges, 0 cost.
// For edges with a build history, use the last build time.
// For edges without history, use the 75th percentile time for edges with history.
// Or, if there is no history at all just use 1
int64_t AssignEdgeRuntime(BuildLog* build_log,
const std::map<Edge*, Plan::Want>& want) {
bool missing_durations = false;
std::vector<int64_t> durations;
int64_t total_time = 0;

for (std::map<Edge*, Plan::Want>::const_iterator it = want.begin(),
end = want.end();
it != end; ++it) {
Edge* edge = it->first;
if (edge->is_phony()) {
continue;
}
BuildLog::LogEntry* entry =
build_log->LookupByOutput(edge->outputs_[0]->path());
if (!entry) {
missing_durations = true;
edge->run_time_ms_ = -1; // -1 to mark as needing filled in
continue;
}
const int64_t duration = entry->end_time - entry->start_time;
edge->run_time_ms_ = duration;
total_time += duration;
durations.push_back(duration);
}

if (!missing_durations) {
return total_time;
}

// Heuristic: for unknown edges, take the 75th percentile time.
// This allows the known-slowest jobs to run first, but isn't so
// small that it is always the lowest priority. Which for slow jobs,
// might bottleneck the build.
int64_t p75_time = 1;
int64_t num_durations = static_cast<int64_t>(durations.size());
if (num_durations > 0) {
size_t p75_idx = (num_durations - 1) - num_durations / 4;
std::vector<int64_t>::iterator p75_it = durations.begin() + p75_idx;
std::nth_element(durations.begin(), p75_it, durations.end());
p75_time = *p75_it;
}

for (std::map<Edge*, Plan::Want>::const_iterator it = want.begin(),
end = want.end();
it != end; ++it) {
Edge* edge = it->first;
if (edge->run_time_ms_ >= 0) {
continue;
}
edge->run_time_ms_ = p75_time;
total_time += p75_time;
}
return total_time;
}

} // namespace

void Plan::ComputeCriticalTime(BuildLog* build_log) {
// testcases have no build_log
if (!build_log)
return;

METRIC_RECORD("ComputePriorityList");
// Remove duplicate targets
{
std::set<const Node*> seen;
SeenBefore<Node> seen_before(&seen);
targets_.erase(std::remove_if(targets_.begin(), targets_.end(), seen_before),
targets_.end());
}

// total time if building all edges in serial. This value is big
// enough to ensure higher priority target's initial critical time
// is always bigger than lower ones
int64_t total_time = AssignEdgeRuntime(build_log, want_);

// Use backflow algorithm to compute critical times for all nodes, starting
// from the destination nodes.
// XXX: ignores pools
std::queue<Edge*> breadthFirstEdges; // Queue, for breadth-first traversal
std::set<const Edge*> active_edges; // Set of in breadthFirstEdges
SeenBefore<Edge> seen_edge(
&active_edges); // Test for uniqueness in breadthFirstEdges

for (std::vector<const Node*>::reverse_iterator it = targets_.rbegin(),
end = targets_.rend();
it != end; ++it) {
if (Edge* in = (*it)->in_edge()) {
// Use initial critical time: total_time * N. This means higher
// priority targets always get a higher critical time value
int64_t priority_weight = (it - targets_.rbegin()) * total_time;
in->set_critical_time(
priority_weight +
std::max<int64_t>(in->run_time_ms_, in->critical_time()));
if (!seen_edge(in)) {
breadthFirstEdges.push(in);
}
}
}

while (!breadthFirstEdges.empty()) {
Edge* e = breadthFirstEdges.front();
breadthFirstEdges.pop();
active_edges.erase(e);

for (std::vector<Node*>::iterator it = e->inputs_.begin(),
end = e->inputs_.end();

This comment was marked as abuse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed C++11 isn't required yet from the code style used elsewhere. Is that correct?

This comment was marked as abuse.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also means no std::chrono::duration<int64_t, std::milli> :-(

it != end; ++it) {
Edge* in = (*it)->in_edge();
if (!in) {
continue;
}
// Only process edge if this node offers a higher critical time
const int64_t proposed_time = e->critical_time() + in->run_time_ms_;
if (proposed_time > in->critical_time()) {
in->set_critical_time(proposed_time);
if (!seen_edge(in)) {
breadthFirstEdges.push(in);
}
}
}
}
}

void Plan::Dump() const {
printf("pending: %d\n", (int)want_.size());
for (map<Edge*, Want>::const_iterator e = want_.begin(); e != want_.end(); ++e) {
Expand Down Expand Up @@ -574,6 +730,8 @@ bool Builder::AlreadyUpToDate() const {
bool Builder::Build(string* err) {
assert(!AlreadyUpToDate());

plan_.ComputeCriticalTime(scan_.build_log());

status_->PlanHasTotalEdges(plan_.command_edge_count());
int pending_commands = 0;
int failures_allowed = config_.failures_allowed;
Expand Down
65 changes: 53 additions & 12 deletions src/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define NINJA_BUILD_H_

#include <cstdio>
#include <queue>
#include <map>
#include <memory>
#include <queue>
Expand All @@ -35,6 +36,42 @@ struct Node;
struct State;
struct Status;


// Set of ready edges, sorted by priority
class EdgeQueue {
struct EdgePriorityCompare {
bool operator()(const Edge* e1, const Edge* e2) const;
};

std::priority_queue<Edge*, std::vector<Edge*>, EdgePriorityCompare> queue_;

public:
void push(Edge* edge) {
queue_.push(edge);
}

template <typename It>
void push(It first, It last) {
for (; first != last; ++first) {
push(*first);
}
}

Edge* pop() {
Edge* ret = queue_.top();
queue_.pop();
return ret;
}

void clear() {
queue_ = std::priority_queue<Edge*, std::vector<Edge*>, EdgePriorityCompare>();
}

size_t size() const { return queue_.size(); }

bool empty() const { return queue_.empty(); }
};

/// Plan stores the state of a build plan: what we intend to build,
/// which steps we're ready to execute.
struct Plan {
Expand Down Expand Up @@ -75,22 +112,12 @@ struct Plan {

/// Reset state. Clears want and ready sets.
void Reset();
void ComputeCriticalTime(BuildLog* build_log);

/// Update the build plan to account for modifications made to the graph
/// by information loaded from a dyndep file.
bool DyndepsLoaded(DependencyScan* scan, const Node* node,
const DyndepFile& ddf, std::string* err);
private:
bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err);
void UnmarkDependents(const Node* node, std::set<Node*>* dependents);
bool AddSubTarget(const Node* node, const Node* dependent, std::string* err,
std::set<Edge*>* dyndep_walk);

/// Update plan with knowledge that the given node is up to date.
/// If the node is a dyndep binding on any of its dependents, this
/// loads dynamic dependencies from the node's path.
/// Returns 'false' if loading dyndep info fails and 'true' otherwise.
bool NodeFinished(Node* node, std::string* err);

/// Enumerate possible steps we want for an edge.
enum Want
Expand All @@ -105,6 +132,18 @@ struct Plan {
kWantToFinish
};

private:
bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err);
void UnmarkDependents(const Node* node, std::set<Node*>* dependents);
bool AddSubTarget(const Node* node, const Node* dependent, std::string* err,
std::set<Edge*>* dyndep_walk);

/// Update plan with knowledge that the given node is up to date.
/// If the node is a dyndep binding on any of its dependents, this
/// loads dynamic dependencies from the node's path.
/// Returns 'false' if loading dyndep info fails and 'true' otherwise.
bool NodeFinished(Node* node, std::string* err);

void EdgeWanted(const Edge* edge);
bool EdgeMaybeReady(std::map<Edge*, Want>::iterator want_e, std::string* err);

Expand All @@ -119,9 +158,11 @@ struct Plan {
/// we want for the edge.
std::map<Edge*, Want> want_;

EdgeSet ready_;
EdgeQueue ready_;

Builder* builder_;
/// user provided targets in build order, earlier one have higher priority
std::vector<const Node*> targets_;

/// Total number of edges that have commands (not phony).
int command_edges_;
Expand Down
14 changes: 11 additions & 3 deletions src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,10 @@ struct Edge {

Edge()
: rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone),
id_(0), outputs_ready_(false), deps_loaded_(false),
deps_missing_(false), generated_by_dep_loader_(false),
implicit_deps_(0), order_only_deps_(0), implicit_outs_(0) {}
id_(0), run_time_ms_(0), critical_time_(-1), outputs_ready_(false),
deps_loaded_(false), deps_missing_(false),
generated_by_dep_loader_(false), implicit_deps_(0), order_only_deps_(0),
implicit_outs_(0) {}

/// Return true if all inputs' in-edges are ready.
bool AllInputsReady() const;
Expand All @@ -171,6 +172,11 @@ struct Edge {

void Dump(const char* prefix="") const;

int64_t critical_time() const { return critical_time_; }
void set_critical_time(int64_t critical_time) {
critical_time_ = critical_time;
}

const Rule* rule_;
Pool* pool_;
std::vector<Node*> inputs_;
Expand All @@ -179,6 +185,8 @@ struct Edge {
BindingEnv* env_;
VisitMark mark_;
size_t id_;
int64_t run_time_ms_;
int64_t critical_time_;
bool outputs_ready_;
bool deps_loaded_;
bool deps_missing_;
Expand Down