Skip to content

Commit 5435992

Browse files
robertgshaw2-redhatmzusman
authored andcommitted
[2/N] API Server: Avoid ulimit footgun (vllm-project#11530)
1 parent 1f20052 commit 5435992

File tree

3 files changed

+26
-2
lines changed

3 files changed

+26
-2
lines changed

vllm/entrypoints/api_server.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from vllm.logger import init_logger
2222
from vllm.sampling_params import SamplingParams
2323
from vllm.usage.usage_lib import UsageContext
24-
from vllm.utils import FlexibleArgumentParser, random_uuid
24+
from vllm.utils import FlexibleArgumentParser, random_uuid, set_ulimit
2525
from vllm.version import __version__ as VLLM_VERSION
2626

2727
logger = init_logger("vllm.entrypoints.api_server")
@@ -119,6 +119,8 @@ async def run_server(args: Namespace,
119119
logger.info("vLLM API server version %s", VLLM_VERSION)
120120
logger.info("args: %s", args)
121121

122+
set_ulimit()
123+
122124
app = await init_app(args, llm_engine)
123125
assert engine is not None
124126

vllm/entrypoints/openai/api_server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
from vllm.logger import init_logger
6969
from vllm.usage.usage_lib import UsageContext
7070
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
71-
is_valid_ipv6_address)
71+
is_valid_ipv6_address, set_ulimit)
7272
from vllm.version import __version__ as VLLM_VERSION
7373

7474
TIMEOUT_KEEP_ALIVE = 5 # seconds
@@ -727,6 +727,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
727727
sock_addr = (args.host or "", args.port)
728728
sock = create_server_socket(sock_addr)
729729

730+
# workaround to avoid footguns where uvicorn drops requests with too
731+
# many concurrent requests active
732+
set_ulimit()
733+
730734
def signal_handler(*_) -> None:
731735
# Interrupt server on sigterm while initializing
732736
raise KeyboardInterrupt("terminated")

vllm/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import ipaddress
1313
import os
1414
import re
15+
import resource
1516
import signal
1617
import socket
1718
import subprocess
@@ -1818,3 +1819,20 @@ def memory_profiling(
18181819
result.non_torch_increase_in_bytes = current_cuda_memory_bytes - baseline_memory_in_bytes - weights_memory_in_bytes - diff.torch_memory_in_bytes # noqa
18191820
result.profile_time = diff.timestamp
18201821
result.non_kv_cache_memory_in_bytes = result.non_torch_increase_in_bytes + result.torch_peak_increase_in_bytes + result.weights_memory_in_bytes # noqa
1822+
1823+
1824+
# Adapted from: https://github.com/sgl-project/sglang/blob/f46f394f4d4dbe4aae85403dec006199b34d2840/python/sglang/srt/utils.py#L630 # noqa: E501Curre
1825+
def set_ulimit(target_soft_limit=65535):
1826+
resource_type = resource.RLIMIT_NOFILE
1827+
current_soft, current_hard = resource.getrlimit(resource_type)
1828+
1829+
if current_soft < target_soft_limit:
1830+
try:
1831+
resource.setrlimit(resource_type,
1832+
(target_soft_limit, current_hard))
1833+
except ValueError as e:
1834+
logger.warning(
1835+
"Found ulimit of %s and failed to automatically increase"
1836+
"with error %s. This can cause fd limit errors like"
1837+
"`OSError: [Errno 24] Too many open files`. Consider "
1838+
"increasing with ulimit -n", current_soft, e)

0 commit comments

Comments
 (0)