Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add critical path scheduler to improve build times #2019

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 124 additions & 7 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) {

} // namespace


bool EdgeQueue::EdgePriorityCompare::operator()(const Edge* e1, const Edge* e2) const {

This comment was marked as abuse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

graph.h which defines Edge isn't included in the header.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could template it. (And if you really mean for it to be just Edge*, you can static_assert that the template is a forward-declared type. As in

class Edge; // Fwd declare
...
    template <typename EdgeType>
    [[nodiscard]] bool operator()(const EdgeType* e1, const EdgeType* e2) const {
        static_assert(std::is_same_v<EdgeType, Edge>); // Avoiding including `graph.h`.
        ...

const int64_t ct1 = e1->critical_time();
const int64_t ct2 = e2->critical_time();
if (ct1 != ct2) {
return ct1 < ct2;
}
return e1->id_ < e2->id_;
}

Plan::Plan(Builder* builder)
: builder_(builder)
, command_edges_(0)
Expand All @@ -89,6 +99,7 @@ void Plan::Reset() {
}

bool Plan::AddTarget(const Node* target, string* err) {
targets_.push_back(target);
return AddSubTarget(target, NULL, err, NULL);
}

Expand Down Expand Up @@ -151,10 +162,7 @@ void Plan::EdgeWanted(const Edge* edge) {
Edge* Plan::FindWork() {
if (ready_.empty())
return NULL;
EdgeSet::iterator e = ready_.begin();
Edge* edge = *e;
ready_.erase(e);
return edge;
return ready_.pop();
}

void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) {
Expand All @@ -172,10 +180,12 @@ void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) {
Pool* pool = edge->pool();
if (pool->ShouldDelayEdge()) {
pool->DelayEdge(edge);
pool->RetrieveReadyEdges(&ready_);
EdgeSet new_edges;
pool->RetrieveReadyEdges(&new_edges);
ready_.push(new_edges.begin(), new_edges.end());
} else {
pool->EdgeScheduled(*edge);
ready_.insert(edge);
ready_.push(edge);
}
}

Expand All @@ -187,7 +197,9 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) {
// See if this job frees up any delayed jobs.
if (directly_wanted)
edge->pool()->EdgeFinished(*edge);
edge->pool()->RetrieveReadyEdges(&ready_);
EdgeSet new_edges;
edge->pool()->RetrieveReadyEdges(&new_edges);
ready_.push(new_edges.begin(), new_edges.end());

// The rest of this function only applies to successful commands.
if (result != kEdgeSucceeded)
Expand Down Expand Up @@ -424,6 +436,109 @@ void Plan::UnmarkDependents(const Node* node, set<Node*>* dependents) {
}
}

namespace {

struct SeenNodeBefore {
std::set<const Node*> *seen;

bool operator() (const Node* node) {
// Return true if the node has been seen before
return !seen->insert(node).second;
}
};

} // namespace

void Plan::ComputeCriticalTime(BuildLog* build_log) {
// testcases have no build_log
if (!build_log)
return;

METRIC_RECORD("ComputePriorityList");
// Remove duplicate targets
{
std::set<const Node*> seen;
targets_.erase(std::remove_if(targets_.begin(), targets_.end(),
SeenNodeBefore{ &seen }),
targets_.end());
}

// this is total time if building all edges in serial, so this value is big
// enough to ensure higher priority target initial critical time always bigger
// than lower one
int64_t total_time = 0;
// Critical path scheduling.
// 0. Assign costs to all edges, using:
// a) The time the edge needed last time, if available.
// b) The average time this edge type needed, if this edge hasn't run before.
// (not implemented .log entries is not grouped by rule type, and even
// similar rule type may not have same name , for example two compile rule
// with different compile flags)
// c) A fixed cost if this type of edge hasn't run before (0 for phony target,
// 1 for others)
//
for (std::map<Edge*, Want>::iterator it = want_.begin(), end = want_.end();
it != end; ++it) {
Edge* edge = it->first;
if (edge->is_phony()) {
continue;
}
BuildLog::LogEntry* entry =
build_log->LookupByOutput(edge->outputs_[0]->path());
if (!entry) {
edge->run_time_ms_ = 1;
continue;
}
int duration = entry->end_time - entry->start_time;
edge->run_time_ms_ = duration;
}

// Use backflow algorithm to compute critical times for all nodes, starting
// from the destination nodes.
// XXX: ignores pools
std::set<Edge*> active_edges; // All edges in edgesQ (for uniqueness)
std::queue<Edge*> edgesQ; // Queue, for breadth-first traversal

for (std::vector<const Node*>::reverse_iterator it = targets_.rbegin(),
end = targets_.rend();
it != end; ++it) {
if (Edge* in = (*it)->in_edge()) {
// Use initial critical time: total_time * N. This means higher
// priority targets always get a higher critical time value
int64_t priority_weight = (it - targets_.rbegin()) * total_time;
in->set_critical_time(
priority_weight +
std::max<int64_t>(in->run_time_ms_, in->critical_time()));
if (active_edges.insert(in).second) {
edgesQ.push(in);
}
}
}

while (!edgesQ.empty()) {
Edge* e = edgesQ.front();
edgesQ.pop();
active_edges.erase(e);

for (std::vector<Node*>::iterator it = e->inputs_.begin(),
end = e->inputs_.end();

This comment was marked as abuse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assumed C++11 isn't required yet from the code style used elsewhere. Is that correct?

This comment was marked as abuse.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also means no std::chrono::duration<int64_t, std::milli> :-(

it != end; ++it) {
Edge* in = (*it)->in_edge();
if (!in) {
continue;
}
// Only process edge if this node offers a higher critical time
const int64_t proposed_time = e->critical_time() + in->run_time_ms_;
if (proposed_time > in->critical_time()) {
in->set_critical_time(proposed_time);
if (active_edges.insert(in).second) {
edgesQ.push(in);
}
}
}
}
}

void Plan::Dump() const {
printf("pending: %d\n", (int)want_.size());
for (map<Edge*, Want>::const_iterator e = want_.begin(); e != want_.end(); ++e) {
Expand Down Expand Up @@ -574,6 +689,8 @@ bool Builder::AlreadyUpToDate() const {
bool Builder::Build(string* err) {
assert(!AlreadyUpToDate());

plan_.ComputeCriticalTime(scan_.build_log());

status_->PlanHasTotalEdges(plan_.command_edge_count());
int pending_commands = 0;
int failures_allowed = config_.failures_allowed;
Expand Down
50 changes: 49 additions & 1 deletion src/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define NINJA_BUILD_H_

#include <cstdio>
#include <queue>
#include <map>
#include <memory>
#include <queue>
Expand All @@ -35,6 +36,50 @@ struct Node;
struct State;
struct Status;


// Set of ready edges, sorted by priority
class EdgeQueue {
struct EdgePriorityCompare {
bool operator()(const Edge* e1, const Edge* e2) const;
};

std::priority_queue<Edge*, std::vector<Edge*>, EdgePriorityCompare> queue_;
// Set to ensure no duplicate entries in ready_
EdgeSet set_;

public:
void push(Edge* edge) {
if (set_.insert(edge).second) {
queue_.push(edge);
}
}

template <typename It>
void push(It first, It last) {
for (; first != last; ++first) {
push(*first);
}
}

Edge* pop() {
Edge* ret = queue_.top();
queue_.pop();
set_.erase(ret);
return ret;
}

void clear() {
set_.clear();
while (!queue_.empty()) {
queue_.pop();
}
}

size_t size() const { return queue_.size(); }

bool empty() const { return queue_.empty(); }
};

/// Plan stores the state of a build plan: what we intend to build,
/// which steps we're ready to execute.
struct Plan {
Expand Down Expand Up @@ -75,6 +120,7 @@ struct Plan {

/// Reset state. Clears want and ready sets.
void Reset();
void ComputeCriticalTime(BuildLog* build_log);

/// Update the build plan to account for modifications made to the graph
/// by information loaded from a dyndep file.
Expand Down Expand Up @@ -119,9 +165,11 @@ struct Plan {
/// we want for the edge.
std::map<Edge*, Want> want_;

EdgeSet ready_;
EdgeQueue ready_;

Builder* builder_;
/// user provided targets in build order, earlier one have higher priority
std::vector<const Node*> targets_;

/// Total number of edges that have commands (not phony).
int command_edges_;
Expand Down
14 changes: 11 additions & 3 deletions src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,10 @@ struct Edge {

Edge()
: rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone),
id_(0), outputs_ready_(false), deps_loaded_(false),
deps_missing_(false), generated_by_dep_loader_(false),
implicit_deps_(0), order_only_deps_(0), implicit_outs_(0) {}
id_(0), run_time_ms_(0), critical_time_(-1), outputs_ready_(false),
deps_loaded_(false), deps_missing_(false),
generated_by_dep_loader_(false), implicit_deps_(0), order_only_deps_(0),
implicit_outs_(0) {}

/// Return true if all inputs' in-edges are ready.
bool AllInputsReady() const;
Expand All @@ -171,6 +172,11 @@ struct Edge {

void Dump(const char* prefix="") const;

int64_t critical_time() const { return critical_time_; }
void set_critical_time(int64_t critical_time) {
critical_time_ = critical_time;
}

const Rule* rule_;
Pool* pool_;
std::vector<Node*> inputs_;
Expand All @@ -179,6 +185,8 @@ struct Edge {
BindingEnv* env_;
VisitMark mark_;
size_t id_;
int run_time_ms_;
int64_t critical_time_;
bool outputs_ready_;
bool deps_loaded_;
bool deps_missing_;
Expand Down