Skip to content

Commit 4f20618

Browse files
agray3nopperl
authored andcommitted
Reset schedule earlier to allow overlap with ggml graph computation on device (ggml-org#6933)
* Reset schedule earlier to allow overlap with graph computation on device
1 parent 024227d commit 4f20618

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

ggml-backend.c

+7-5
Original file line numberDiff line numberDiff line change
@@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
17841784

17851785
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
17861786
// reset state for the next run
1787-
size_t hash_size = sched->hash_set.size;
1788-
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
1789-
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
1790-
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
1787+
if (!sched->is_reset) {
1788+
size_t hash_size = sched->hash_set.size;
1789+
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
1790+
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
1791+
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
17911792

1792-
sched->is_reset = true;
1793+
sched->is_reset = true;
1794+
}
17931795
sched->is_alloc = false;
17941796
}
17951797

llama.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -11473,6 +11473,10 @@ static int llama_decode_internal(
1147311473
}
1147411474
}
1147511475

11476+
// Reset state for the next token before backend sync, to allow the CPU activities in the reset to
11477+
// overlap with device computation.
11478+
ggml_backend_sched_reset(lctx.sched);
11479+
1147611480
return 0;
1147711481
}
1147811482

0 commit comments

Comments
 (0)