File tree 2 files changed +11
-5
lines changed
2 files changed +11
-5
lines changed Original file line number Diff line number Diff line change @@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
1784
1784
1785
1785
void ggml_backend_sched_reset (ggml_backend_sched_t sched ) {
1786
1786
// reset state for the next run
1787
- size_t hash_size = sched -> hash_set .size ;
1788
- memset (sched -> hash_set .keys , 0 , sizeof (sched -> hash_set .keys [0 ]) * hash_size ); // NOLINT
1789
- memset (sched -> tensor_backend_id , -1 , sizeof (sched -> tensor_backend_id [0 ]) * hash_size );
1790
- memset (sched -> tensor_copies , 0 , sizeof (sched -> tensor_copies [0 ]) * hash_size );
1787
+ if (!sched -> is_reset ) {
1788
+ size_t hash_size = sched -> hash_set .size ;
1789
+ memset (sched -> hash_set .keys , 0 , sizeof (sched -> hash_set .keys [0 ]) * hash_size ); // NOLINT
1790
+ memset (sched -> tensor_backend_id , -1 , sizeof (sched -> tensor_backend_id [0 ]) * hash_size );
1791
+ memset (sched -> tensor_copies , 0 , sizeof (sched -> tensor_copies [0 ]) * hash_size );
1791
1792
1792
- sched -> is_reset = true;
1793
+ sched -> is_reset = true;
1794
+ }
1793
1795
sched -> is_alloc = false;
1794
1796
}
1795
1797
Original file line number Diff line number Diff line change @@ -11473,6 +11473,10 @@ static int llama_decode_internal(
11473
11473
}
11474
11474
}
11475
11475
11476
+ // Reset state for the next token before backend sync, to allow the CPU activities in the reset to
11477
+ // overlap with device computation.
11478
+ ggml_backend_sched_reset(lctx.sched);
11479
+
11476
11480
return 0;
11477
11481
}
11478
11482
You can’t perform that action at this time.
0 commit comments