Skip to content

Commit

Permalink
[2/N] API Server: Avoid ulimit footgun (#11530)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-redhat authored Dec 26, 2024
1 parent 2072924 commit 55fb97f
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
4 changes: 3 additions & 1 deletion vllm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from vllm.logger import init_logger
from vllm.sampling_params import SamplingParams
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, random_uuid
from vllm.utils import FlexibleArgumentParser, random_uuid, set_ulimit
from vllm.version import __version__ as VLLM_VERSION

logger = init_logger("vllm.entrypoints.api_server")
Expand Down Expand Up @@ -119,6 +119,8 @@ async def run_server(args: Namespace,
logger.info("vLLM API server version %s", VLLM_VERSION)
logger.info("args: %s", args)

set_ulimit()

app = await init_app(args, llm_engine)
assert engine is not None

Expand Down
6 changes: 5 additions & 1 deletion vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.utils import (FlexibleArgumentParser, get_open_zmq_ipc_path,
is_valid_ipv6_address)
is_valid_ipv6_address, set_ulimit)
from vllm.version import __version__ as VLLM_VERSION

TIMEOUT_KEEP_ALIVE = 5 # seconds
Expand Down Expand Up @@ -727,6 +727,10 @@ async def run_server(args, **uvicorn_kwargs) -> None:
sock_addr = (args.host or "", args.port)
sock = create_server_socket(sock_addr)

# workaround to avoid footguns where uvicorn drops requests with too
# many concurrent requests active
set_ulimit()

def signal_handler(*_) -> None:
# Interrupt server on sigterm while initializing
raise KeyboardInterrupt("terminated")
Expand Down
18 changes: 18 additions & 0 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import ipaddress
import os
import re
import resource
import signal
import socket
import subprocess
Expand Down Expand Up @@ -1818,3 +1819,20 @@ def memory_profiling(
result.non_torch_increase_in_bytes = current_cuda_memory_bytes - baseline_memory_in_bytes - weights_memory_in_bytes - diff.torch_memory_in_bytes # noqa
result.profile_time = diff.timestamp
result.non_kv_cache_memory_in_bytes = result.non_torch_increase_in_bytes + result.torch_peak_increase_in_bytes + result.weights_memory_in_bytes # noqa


# Adapted from: https://github.com/sgl-project/sglang/blob/f46f394f4d4dbe4aae85403dec006199b34d2840/python/sglang/srt/utils.py#L630 # noqa: E501Curre
def set_ulimit(target_soft_limit=65535):
resource_type = resource.RLIMIT_NOFILE
current_soft, current_hard = resource.getrlimit(resource_type)

if current_soft < target_soft_limit:
try:
resource.setrlimit(resource_type,
(target_soft_limit, current_hard))
except ValueError as e:
logger.warning(
"Found ulimit of %s and failed to automatically increase"
"with error %s. This can cause fd limit errors like"
"`OSError: [Errno 24] Too many open files`. Consider "
"increasing with ulimit -n", current_soft, e)

0 comments on commit 55fb97f

Please sign in to comment.