diff --git a/docs/reference/api-reference.md b/docs/reference/api-reference.md index b6b063bfd..532eba570 100644 --- a/docs/reference/api-reference.md +++ b/docs/reference/api-reference.md @@ -7552,23 +7552,6 @@ client.inference.get({ ... }) - **`task_type` (Optional, Enum("sparse_embedding" | "text_embedding" | "rerank" | "completion" | "chat_completion"))**: The task type - **`inference_id` (Optional, string)**: The inference Id -## client.inference.postEisChatCompletion [_inference.post_eis_chat_completion] -Perform a chat completion task through the Elastic Inference Service (EIS). - -Perform a chat completion inference task with the `elastic` service. - -[Endpoint documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion) - -```ts -client.inference.postEisChatCompletion({ eis_inference_id }) -``` - -### Arguments [_arguments_inference.post_eis_chat_completion] - -#### Request (object) [_request_inference.post_eis_chat_completion] -- **`eis_inference_id` (string)**: The unique identifier of the inference endpoint. -- **`chat_completion_request` (Optional, { messages, model, max_completion_tokens, stop, temperature, tool_choice, tools, top_p })** - ## client.inference.put [_inference.put] Create an inference endpoint. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. @@ -7775,26 +7758,6 @@ These settings are specific to the `cohere` service. - **`task_settings` (Optional, { input_type, return_documents, top_n, truncate })**: Settings to configure the inference task. These settings are specific to the task type you specified. -## client.inference.putEis [_inference.put_eis] -Create an Elastic Inference Service (EIS) inference endpoint. - -Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS). - -[Endpoint documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis) - -```ts -client.inference.putEis({ task_type, eis_inference_id, service, service_settings }) -``` - -### Arguments [_arguments_inference.put_eis] - -#### Request (object) [_request_inference.put_eis] -- **`task_type` (Enum("chat_completion"))**: The type of the inference task that the model will perform. -NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. -- **`eis_inference_id` (string)**: The unique identifier of the inference endpoint. -- **`service` (Enum("elastic"))**: The type of service supported for the specified task type. In this case, `elastic`. -- **`service_settings` ({ model_id, rate_limit })**: Settings used to install the inference model. These settings are specific to the `elastic` service. - ## client.inference.putElasticsearch [_inference.put_elasticsearch] Create an Elasticsearch inference endpoint. diff --git a/src/api/api/inference.ts b/src/api/api/inference.ts index 6d3d64e2e..4df7b4e65 100644 --- a/src/api/api/inference.ts +++ b/src/api/api/inference.ts @@ -77,15 +77,6 @@ export default class Inference { body: [], query: [] }, - 'inference.post_eis_chat_completion': { - path: [ - 'eis_inference_id' - ], - body: [ - 'chat_completion_request' - ], - query: [] - }, 'inference.put': { path: [ 'task_type', @@ -174,17 +165,6 @@ export default class Inference { ], query: [] }, - 'inference.put_eis': { - path: [ - 'task_type', - 'eis_inference_id' - ], - body: [ - 'service', - 'service_settings' - ], - query: [] - }, 'inference.put_elasticsearch': { path: [ 'task_type', @@ -583,53 +563,6 @@ export default class Inference { return await this.transport.request({ path, method, querystring, body, meta }, options) } - /** - * Perform a chat completion task through the Elastic Inference Service (EIS). Perform a chat completion inference task with the `elastic` service. - * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion | Elasticsearch API documentation} - */ - async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest, options?: TransportRequestOptionsWithOutMeta): Promise - async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest, options?: TransportRequestOptionsWithMeta): Promise> - async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest, options?: TransportRequestOptions): Promise - async postEisChatCompletion (this: That, params: T.InferencePostEisChatCompletionRequest, options?: TransportRequestOptions): Promise { - const { - path: acceptedPath, - body: acceptedBody, - query: acceptedQuery - } = this.acceptedParams['inference.post_eis_chat_completion'] - - const userQuery = params?.querystring - const querystring: Record = userQuery != null ? { ...userQuery } : {} - - let body: any = params.body ?? undefined - for (const key in params) { - if (acceptedBody.includes(key)) { - // @ts-expect-error - body = params[key] - } else if (acceptedPath.includes(key)) { - continue - } else if (key !== 'body' && key !== 'querystring') { - if (acceptedQuery.includes(key) || commonQueryParams.includes(key)) { - // @ts-expect-error - querystring[key] = params[key] - } else { - body = body ?? {} - // @ts-expect-error - body[key] = params[key] - } - } - } - - const method = 'POST' - const path = `/_inference/chat_completion/${encodeURIComponent(params.eis_inference_id.toString())}/_stream` - const meta: TransportRequestMetadata = { - name: 'inference.post_eis_chat_completion', - pathParts: { - eis_inference_id: params.eis_inference_id - } - } - return await this.transport.request({ path, method, querystring, body, meta }, options) - } - /** * Create an inference endpoint. When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put | Elasticsearch API documentation} @@ -1033,64 +966,6 @@ export default class Inference { return await this.transport.request({ path, method, querystring, body, meta }, options) } - /** - * Create an Elastic Inference Service (EIS) inference endpoint. Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS). - * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-eis | Elasticsearch API documentation} - */ - async putEis (this: That, params: T.InferencePutEisRequest, options?: TransportRequestOptionsWithOutMeta): Promise - async putEis (this: That, params: T.InferencePutEisRequest, options?: TransportRequestOptionsWithMeta): Promise> - async putEis (this: That, params: T.InferencePutEisRequest, options?: TransportRequestOptions): Promise - async putEis (this: That, params: T.InferencePutEisRequest, options?: TransportRequestOptions): Promise { - const { - path: acceptedPath, - body: acceptedBody, - query: acceptedQuery - } = this.acceptedParams['inference.put_eis'] - - const userQuery = params?.querystring - const querystring: Record = userQuery != null ? { ...userQuery } : {} - - let body: Record | string | undefined - const userBody = params?.body - if (userBody != null) { - if (typeof userBody === 'string') { - body = userBody - } else { - body = { ...userBody } - } - } - - for (const key in params) { - if (acceptedBody.includes(key)) { - body = body ?? {} - // @ts-expect-error - body[key] = params[key] - } else if (acceptedPath.includes(key)) { - continue - } else if (key !== 'body' && key !== 'querystring') { - if (acceptedQuery.includes(key) || commonQueryParams.includes(key)) { - // @ts-expect-error - querystring[key] = params[key] - } else { - body = body ?? {} - // @ts-expect-error - body[key] = params[key] - } - } - } - - const method = 'PUT' - const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.eis_inference_id.toString())}` - const meta: TransportRequestMetadata = { - name: 'inference.put_eis', - pathParts: { - task_type: params.task_type, - eis_inference_id: params.eis_inference_id - } - } - return await this.transport.request({ path, method, querystring, body, meta }, options) - } - /** * Create an Elasticsearch inference endpoint. Create an inference endpoint to perform an inference task with the `elasticsearch` service. > info > Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings. If you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet. > info > You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. * @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-elasticsearch | Elasticsearch API documentation} diff --git a/src/api/types.ts b/src/api/types.ts index 22c0302f6..6f8720570 100644 --- a/src/api/types.ts +++ b/src/api/types.ts @@ -15503,8 +15503,10 @@ export interface ClusterHealthHealthResponseBody { active_primary_shards: integer /** The total number of active primary and replica shards. */ active_shards: integer + /** The ratio of active shards in the cluster expressed as a string formatted percentage. */ + active_shards_percent?: string /** The ratio of active shards in the cluster expressed as a percentage. */ - active_shards_percent_as_number: Percentage + active_shards_percent_as_number: double /** The name of the cluster. */ cluster_name: Name /** The number of shards whose allocation has been delayed by the timeout settings. */ @@ -15566,7 +15568,7 @@ export interface ClusterHealthRequest extends RequestBase { /** Can be one of immediate, urgent, high, normal, low, languid. Wait until all currently queued events with the given priority are processed. */ wait_for_events?: WaitForEvents /** The request waits until the specified number N of nodes is available. It also accepts >=N, <=N, >N and /** Contains the inference type and its options. */ inference_config?: IngestInferenceConfig + /** Input fields for inference and output (destination) fields for the inference results. + * This option is incompatible with the target_field and field_map options. */ + input_output?: IngestInputConfig | IngestInputConfig[] + /** If true and any of the input fields defined in input_ouput are missing + * then those missing fields are quietly ignored, otherwise a missing field causes a failure. + * Only applies when using input_output configurations to explicitly list the input fields. */ + ignore_missing?: boolean } export interface IngestIngest { @@ -23052,6 +23021,11 @@ export interface IngestIngest { pipeline?: Name } +export interface IngestInputConfig { + input_field: string + output_field: string +} + export interface IngestIpLocationProcessor extends IngestProcessorBase { /** The database filename referring to a database the module ships with (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb) or a custom database in the ingest-geoip config directory. */ database_file?: string