diff --git a/engine/config/yaml_config.cc b/engine/config/yaml_config.cc index 9650ffdcc..38128e1c4 100644 --- a/engine/config/yaml_config.cc +++ b/engine/config/yaml_config.cc @@ -48,7 +48,7 @@ void YamlHandler::ReadYamlFile(const std::string& file_path) { if (!yaml_node_["mmproj"]) { auto s = nomalize_path(file_path); auto abs_path = s.substr(0, s.find_last_of('/')) + "/mmproj.gguf"; - CTL_DBG("mmproj: " << abs_path); + CTL_TRC("mmproj: " << abs_path); auto rel_path = fmu::ToRelativeCortexDataPath(fs::path(abs_path)); if (std::filesystem::exists(abs_path)) { yaml_node_["mmproj"] = rel_path.string(); diff --git a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py index 8672110e2..3198c81a5 100644 --- a/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py +++ b/engine/e2e-test/cli/engines/test_cli_engine_uninstall.py @@ -24,7 +24,10 @@ def setup_and_teardown(self): @pytest.mark.asyncio async def test_engines_uninstall_llamacpp_should_be_successfully(self): - response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install") + data = {"version": "b5371"} + response = requests.post( + "http://localhost:3928/v1/engines/llama-cpp/install", json=data + ) await wait_for_websocket_download_success_event(timeout=None) exit_code, output, error = run( "Uninstall engine", ["engines", "uninstall", "llama-cpp"] diff --git a/engine/e2e-test/cli/model/test_cli_model.py b/engine/e2e-test/cli/model/test_cli_model.py index aa6e99e4a..cd80a9e2b 100644 --- a/engine/e2e-test/cli/model/test_cli_model.py +++ b/engine/e2e-test/cli/model/test_cli_model.py @@ -36,6 +36,7 @@ def setup_and_teardown(self): run("Delete model", ["models", "delete", "tinyllama:1b"]) stop_server() + @pytest.mark.skipif(platform.system() == "Windows", reason="Skip test for Windows") def test_model_pull_with_direct_url_should_be_success(self): exit_code, output, error = run( "Pull model", diff --git a/engine/extensions/local-engine/local_engine.cc b/engine/extensions/local-engine/local_engine.cc index b769c5e8c..2bba11a7b 100644 --- a/engine/extensions/local-engine/local_engine.cc +++ b/engine/extensions/local-engine/local_engine.cc @@ -80,6 +80,11 @@ std::vector ConvertJsonToParamsVector(const Json::Value& root) { res.push_back("--no-mmap"); } continue; + } else if (member == "ignore_eos") { + if (root[member].asBool()) { + res.push_back("--ignore_eos"); + } + continue; } res.push_back("--" + member); @@ -502,6 +507,23 @@ void LocalEngine::HandleEmbedding(std::shared_ptr json_body, void LocalEngine::LoadModel(std::shared_ptr json_body, http_callback&& callback) { + auto model_id = json_body->get("model", "").asString(); + if (model_id.empty()) { + CTL_WRN("Model is empty"); + } + if (server_map_.find(model_id) != server_map_.end()) { + CTL_INF("Model " << model_id << " is already loaded"); + Json::Value error; + error["error"] = "Model " + model_id + " is already loaded"; + Json::Value status; + status["is_done"] = true; + status["has_error"] = true; + status["is_stream"] = false; + status["status_code"] = 409; + callback(std::move(status), std::move(error)); + return; + } + CTL_INF("Start loading model"); auto wait_for_server_up = [this](const std::string& model, const std::string& host, int port) { @@ -524,10 +546,7 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, }; LOG_DEBUG << "Start to spawn llama-server"; - auto model_id = json_body->get("model", "").asString(); - if (model_id.empty()) { - CTL_WRN("Model is empty"); - } + server_map_[model_id].host = "127.0.0.1"; server_map_[model_id].port = GenerateRandomInteger(39400, 39999); auto& s = server_map_[model_id]; @@ -545,6 +564,8 @@ void LocalEngine::LoadModel(std::shared_ptr json_body, params.push_back("--pooling"); params.push_back("mean"); + params.push_back("--jinja"); + std::vector v; v.reserve(params.size() + 1); auto engine_dir = engine_service_.GetEngineDirPath(kLlamaRepo); diff --git a/engine/services/model_source_service.cc b/engine/services/model_source_service.cc index b5979667c..661b9b580 100644 --- a/engine/services/model_source_service.cc +++ b/engine/services/model_source_service.cc @@ -433,8 +433,7 @@ cpp::result ModelSourceService::AddCortexsoRepo( auto author = hub_author; auto model_author = hu::GetModelAuthorCortexsoHub(model_name); - if (auto model_author = hu::GetModelAuthorCortexsoHub(model_name); - model_author.has_value() && !model_author.value().empty()) { + if (model_author.has_value() && !model_author.value().empty()) { author = model_author.value(); }