-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add critical path scheduler to improve build times #2019
Changes from 9 commits
4af9fc5
12b5b7c
8e23200
2fcf403
c5d355c
63b0a9a
5b8d19b
fe80637
c83167f
77448b4
24d1f5f
1af6daf
6ee9049
1128a56
a861164
026498f
4bd8db1
a643af2
f2333b7
09d4faa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,6 +75,16 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) { | |
|
||
} // namespace | ||
|
||
|
||
bool EdgeQueue::EdgePriorityCompare::operator()(const Edge* e1, const Edge* e2) const { | ||
const int64_t ct1 = e1->critical_time(); | ||
const int64_t ct2 = e2->critical_time(); | ||
if (ct1 != ct2) { | ||
return ct1 < ct2; | ||
} | ||
return e1->id_ < e2->id_; | ||
} | ||
|
||
Plan::Plan(Builder* builder) | ||
: builder_(builder) | ||
, command_edges_(0) | ||
|
@@ -89,6 +99,7 @@ void Plan::Reset() { | |
} | ||
|
||
bool Plan::AddTarget(const Node* target, string* err) { | ||
targets_.push_back(target); | ||
return AddSubTarget(target, NULL, err, NULL); | ||
} | ||
|
||
|
@@ -151,10 +162,7 @@ void Plan::EdgeWanted(const Edge* edge) { | |
Edge* Plan::FindWork() { | ||
if (ready_.empty()) | ||
return NULL; | ||
EdgeSet::iterator e = ready_.begin(); | ||
Edge* edge = *e; | ||
ready_.erase(e); | ||
return edge; | ||
return ready_.pop(); | ||
} | ||
|
||
void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) { | ||
|
@@ -172,10 +180,12 @@ void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) { | |
Pool* pool = edge->pool(); | ||
if (pool->ShouldDelayEdge()) { | ||
pool->DelayEdge(edge); | ||
pool->RetrieveReadyEdges(&ready_); | ||
EdgeSet new_edges; | ||
pool->RetrieveReadyEdges(&new_edges); | ||
ready_.push(new_edges.begin(), new_edges.end()); | ||
} else { | ||
pool->EdgeScheduled(*edge); | ||
ready_.insert(edge); | ||
ready_.push(edge); | ||
} | ||
} | ||
|
||
|
@@ -187,7 +197,9 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) { | |
// See if this job frees up any delayed jobs. | ||
if (directly_wanted) | ||
edge->pool()->EdgeFinished(*edge); | ||
edge->pool()->RetrieveReadyEdges(&ready_); | ||
EdgeSet new_edges; | ||
edge->pool()->RetrieveReadyEdges(&new_edges); | ||
ready_.push(new_edges.begin(), new_edges.end()); | ||
|
||
// The rest of this function only applies to successful commands. | ||
if (result != kEdgeSucceeded) | ||
|
@@ -424,6 +436,150 @@ void Plan::UnmarkDependents(const Node* node, set<Node*>* dependents) { | |
} | ||
} | ||
|
||
namespace { | ||
|
||
template <typename T> | ||
struct SeenBefore { | ||
std::set<const T*>* seen_; | ||
|
||
SeenBefore(std::set<const T*>* seen) : seen_(seen) {} | ||
|
||
bool operator() (const T* item) { | ||
// Return true if the item has been seen before | ||
return !seen_->insert(item).second; | ||
} | ||
}; | ||
|
||
// Assign run_time_ms_ for all wanted edges, and returns total time for all edges | ||
// For phony edges, 0 cost. | ||
// For edges with a build history, use the last build time. | ||
// For edges without history, use the 75th percentile time for edges with history. | ||
// Or, if there is no history at all just use 1 | ||
int64_t AssignEdgeRuntime(BuildLog* build_log, | ||
const std::map<Edge*, Plan::Want>& want) { | ||
bool missing_durations = false; | ||
std::vector<int64_t> durations; | ||
int64_t total_time = 0; | ||
|
||
for (std::map<Edge*, Plan::Want>::const_iterator it = want.begin(), | ||
end = want.end(); | ||
it != end; ++it) { | ||
Edge* edge = it->first; | ||
if (edge->is_phony()) { | ||
continue; | ||
} | ||
BuildLog::LogEntry* entry = | ||
build_log->LookupByOutput(edge->outputs_[0]->path()); | ||
if (!entry) { | ||
missing_durations = true; | ||
edge->run_time_ms_ = -1; // -1 to mark as needing filled in | ||
continue; | ||
} | ||
const int64_t duration = entry->end_time - entry->start_time; | ||
edge->run_time_ms_ = duration; | ||
total_time += duration; | ||
durations.push_back(duration); | ||
} | ||
|
||
if (!missing_durations) { | ||
return total_time; | ||
} | ||
|
||
// Heuristic: for unknown edges, take the 75th percentile time. | ||
// This allows the known-slowest jobs to run first, but isn't so | ||
// small that it is always the lowest priority. Which for slow jobs, | ||
// might bottleneck the build. | ||
int64_t p75_time = 1; | ||
int64_t num_durations = static_cast<int64_t>(durations.size()); | ||
if (num_durations > 0) { | ||
size_t p75_idx = (num_durations - 1) - num_durations / 4; | ||
std::vector<int64_t>::iterator p75_it = durations.begin() + p75_idx; | ||
std::nth_element(durations.begin(), p75_it, durations.end()); | ||
p75_time = *p75_it; | ||
} | ||
|
||
for (std::map<Edge*, Plan::Want>::const_iterator it = want.begin(), | ||
end = want.end(); | ||
it != end; ++it) { | ||
Edge* edge = it->first; | ||
if (edge->run_time_ms_ >= 0) { | ||
peterbell10 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
continue; | ||
} | ||
edge->run_time_ms_ = p75_time; | ||
total_time += p75_time; | ||
} | ||
return total_time; | ||
} | ||
|
||
} // namespace | ||
|
||
void Plan::ComputeCriticalTime(BuildLog* build_log) { | ||
// testcases have no build_log | ||
if (!build_log) | ||
return; | ||
|
||
METRIC_RECORD("ComputePriorityList"); | ||
// Remove duplicate targets | ||
{ | ||
std::set<const Node*> seen; | ||
SeenBefore<Node> seen_before(&seen); | ||
targets_.erase(std::remove_if(targets_.begin(), targets_.end(), seen_before), | ||
targets_.end()); | ||
} | ||
|
||
// total time if building all edges in serial. This value is big | ||
// enough to ensure higher priority target's initial critical time | ||
// is always bigger than lower ones | ||
int64_t total_time = AssignEdgeRuntime(build_log, want_); | ||
|
||
// Use backflow algorithm to compute critical times for all nodes, starting | ||
// from the destination nodes. | ||
// XXX: ignores pools | ||
std::queue<Edge*> breadthFirstEdges; // Queue, for breadth-first traversal | ||
peterbell10 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
std::set<const Edge*> active_edges; // Set of in breadthFirstEdges | ||
SeenBefore<Edge> seen_edge( | ||
&active_edges); // Test for uniqueness in breadthFirstEdges | ||
|
||
for (std::vector<const Node*>::reverse_iterator it = targets_.rbegin(), | ||
end = targets_.rend(); | ||
it != end; ++it) { | ||
if (Edge* in = (*it)->in_edge()) { | ||
// Use initial critical time: total_time * N. This means higher | ||
// priority targets always get a higher critical time value | ||
peterbell10 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int64_t priority_weight = (it - targets_.rbegin()) * total_time; | ||
in->set_critical_time( | ||
priority_weight + | ||
std::max<int64_t>(in->run_time_ms_, in->critical_time())); | ||
if (!seen_edge(in)) { | ||
breadthFirstEdges.push(in); | ||
} | ||
} | ||
} | ||
|
||
while (!breadthFirstEdges.empty()) { | ||
Edge* e = breadthFirstEdges.front(); | ||
breadthFirstEdges.pop(); | ||
active_edges.erase(e); | ||
peterbell10 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
for (std::vector<Node*>::iterator it = e->inputs_.begin(), | ||
end = e->inputs_.end(); | ||
This comment was marked as abuse.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assumed C++11 isn't required yet from the code style used elsewhere. Is that correct?
This comment was marked as abuse.
Sorry, something went wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also means no |
||
it != end; ++it) { | ||
Edge* in = (*it)->in_edge(); | ||
if (!in) { | ||
continue; | ||
} | ||
// Only process edge if this node offers a higher critical time | ||
const int64_t proposed_time = e->critical_time() + in->run_time_ms_; | ||
if (proposed_time > in->critical_time()) { | ||
in->set_critical_time(proposed_time); | ||
if (!seen_edge(in)) { | ||
breadthFirstEdges.push(in); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
void Plan::Dump() const { | ||
printf("pending: %d\n", (int)want_.size()); | ||
for (map<Edge*, Want>::const_iterator e = want_.begin(); e != want_.end(); ++e) { | ||
|
@@ -574,6 +730,8 @@ bool Builder::AlreadyUpToDate() const { | |
bool Builder::Build(string* err) { | ||
assert(!AlreadyUpToDate()); | ||
|
||
plan_.ComputeCriticalTime(scan_.build_log()); | ||
|
||
status_->PlanHasTotalEdges(plan_.command_edge_count()); | ||
int pending_commands = 0; | ||
int failures_allowed = config_.failures_allowed; | ||
|
This comment was marked as abuse.
Sorry, something went wrong.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
graph.h
which definesEdge
isn't included in the header.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could template it. (And if you really mean for it to be just
Edge*
, you can static_assert that the template is a forward-declared type. As in