@@ -5672,6 +5672,48 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod
5672
5672
} else {
5673
5673
compute_ctx = ctx->compute_ctx .lock ();
5674
5674
}
5675
+ } else {
5676
+ switch (node->op ) {
5677
+ case GGML_OP_REPEAT:
5678
+ case GGML_OP_ACC:
5679
+ case GGML_OP_GET_ROWS:
5680
+ case GGML_OP_ADD:
5681
+ case GGML_OP_MUL:
5682
+ case GGML_OP_DIV:
5683
+ case GGML_OP_CONCAT:
5684
+ case GGML_OP_UPSCALE:
5685
+ case GGML_OP_SCALE:
5686
+ case GGML_OP_SQR:
5687
+ case GGML_OP_SIN:
5688
+ case GGML_OP_COS:
5689
+ case GGML_OP_CLAMP:
5690
+ case GGML_OP_PAD:
5691
+ case GGML_OP_CPY:
5692
+ case GGML_OP_CONT:
5693
+ case GGML_OP_DUP:
5694
+ case GGML_OP_NORM:
5695
+ case GGML_OP_GROUP_NORM:
5696
+ case GGML_OP_RMS_NORM:
5697
+ case GGML_OP_UNARY:
5698
+ case GGML_OP_DIAG_MASK_INF:
5699
+ case GGML_OP_SOFT_MAX:
5700
+ case GGML_OP_ROPE:
5701
+ case GGML_OP_ARGSORT:
5702
+ case GGML_OP_SUM_ROWS:
5703
+ case GGML_OP_IM2COL:
5704
+ case GGML_OP_TIMESTEP_EMBEDDING:
5705
+ case GGML_OP_POOL_2D:
5706
+ case GGML_OP_LEAKY_RELU:
5707
+ {
5708
+ // These operations all go through ggml_vk_op_f32, so short-circuit and
5709
+ // do the only thing needed for the dryrun.
5710
+ vk_pipeline pipeline = ggml_vk_op_get_pipeline (ctx, src0, src1, src2, node, node->op );
5711
+ ggml_pipeline_request_descriptor_sets (ctx->device , pipeline, 1 );
5712
+ return false ;
5713
+ }
5714
+ default :
5715
+ break ;
5716
+ }
5675
5717
}
5676
5718
5677
5719
switch (node->op ) {
@@ -6401,16 +6443,17 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
6401
6443
bool first_node_in_batch = true ; // true if next node will be first node in a batch
6402
6444
int submit_node_idx = 0 ; // index to first node in a batch
6403
6445
6404
- // submit work every submit_count node to overlap CPU cmdbuffer generation with GPU execution
6405
- constexpr int submit_count = 100 ;
6446
+ // Submit work every nodes_per_submit nodes to overlap CPU cmdbuffer generation with GPU execution.
6447
+ // Start with a smaller count to get work submitted right away, and increase it after each submit.
6448
+ int nodes_per_submit = 20 ;
6406
6449
int submitted_nodes = 0 ;
6450
+ int submit_count = 0 ;
6407
6451
for (int i = 0 ; i < cgraph->n_nodes ; i++) {
6408
6452
if (first_node_in_batch) {
6409
6453
submit_node_idx = i;
6410
6454
}
6411
6455
6412
- bool submit = (submitted_nodes >= submit_count) || (i == last_node);
6413
-
6456
+ bool submit = (submitted_nodes >= nodes_per_submit) || (i == last_node);
6414
6457
6415
6458
bool enqueued = ggml_vk_build_graph (ctx, cgraph->nodes [i], i, cgraph->nodes [submit_node_idx], submit_node_idx, false , i == last_node, submit);
6416
6459
@@ -6427,6 +6470,15 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg
6427
6470
if (submit) {
6428
6471
first_node_in_batch = true ;
6429
6472
submitted_nodes = 0 ;
6473
+ switch (submit_count) {
6474
+ case 0 :
6475
+ nodes_per_submit = 50 ;
6476
+ break ;
6477
+ default :
6478
+ nodes_per_submit = 100 ;
6479
+ break ;
6480
+ }
6481
+ submit_count++;
6430
6482
}
6431
6483
}
6432
6484
0 commit comments