@@ -683,7 +683,7 @@ struct cmd_params_instance {
683
683
bool cpu_strict;
684
684
int poll;
685
685
int n_gpu_layers;
686
- std::string rpc_servers ;
686
+ std::string rpc_servers_str ;
687
687
llama_split_mode split_mode;
688
688
int main_gpu;
689
689
bool no_kv_offload;
@@ -696,8 +696,37 @@ struct cmd_params_instance {
696
696
llama_model_params mparams = llama_model_default_params ();
697
697
698
698
mparams.n_gpu_layers = n_gpu_layers;
699
- if (!rpc_servers.empty ()) {
700
- mparams.rpc_servers = rpc_servers.c_str ();
699
+ if (!rpc_servers_str.empty ()) {
700
+ auto rpc_servers = string_split<std::string>(rpc_servers_str, ' ,' );
701
+
702
+ // add RPC devices
703
+ if (!rpc_servers.empty ()) {
704
+ ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name (" RPC" );
705
+ if (!rpc_reg) {
706
+ fprintf (stderr, " %s: failed to find RPC backend\n " , __func__);
707
+ exit (1 );
708
+ }
709
+
710
+ typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t )(const char * endpoint);
711
+ ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t ) ggml_backend_reg_get_proc_address (rpc_reg, " ggml_backend_rpc_add_device" );
712
+ if (!ggml_backend_rpc_add_device_fn) {
713
+ fprintf (stderr, " %s: failed to find RPC device add function\n " , __func__);
714
+ exit (1 );
715
+ }
716
+ static std::vector<ggml_backend_dev_t > devices;
717
+ devices.clear ();
718
+ for (const std::string & server : rpc_servers) {
719
+ ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn (server.c_str ());
720
+ if (dev) {
721
+ devices.push_back (dev);
722
+ } else {
723
+ fprintf (stderr, " %s: failed to add RPC device for server '%s'\n " , __func__, server.c_str ());
724
+ exit (1 );
725
+ }
726
+ }
727
+ devices.push_back (nullptr );
728
+ mparams.devices = devices.data ();
729
+ }
701
730
}
702
731
mparams.split_mode = split_mode;
703
732
mparams.main_gpu = main_gpu;
@@ -708,7 +737,7 @@ struct cmd_params_instance {
708
737
}
709
738
710
739
bool equal_mparams (const cmd_params_instance & other) const {
711
- return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers == other.rpc_servers &&
740
+ return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str &&
712
741
split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap &&
713
742
tensor_split == other.tensor_split ;
714
743
}
0 commit comments