-
-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Frontend][V1] Online serving performance improvements #12287
Changes from 4 commits
55dd119
0e92b61
aa7f031
e6fc61f
eafe7cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import re | ||
import time | ||
from argparse import Namespace | ||
from typing import Any, Dict, List, Literal, Optional, Union | ||
from typing import Any, ClassVar, Dict, List, Literal, Optional, Set, Union | ||
|
||
import torch | ||
from pydantic import BaseModel, ConfigDict, Field, model_validator | ||
|
@@ -42,23 +42,31 @@ class OpenAIBaseModel(BaseModel): | |
# OpenAI API does allow extra fields | ||
model_config = ConfigDict(extra="allow") | ||
|
||
# Cache class field names | ||
field_names: ClassVar[Optional[Set[str]]] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was noticeable overhead creating this set every time one of these objects is instantiated. |
||
|
||
@model_validator(mode="before") | ||
@classmethod | ||
def __log_extra_fields__(cls, data): | ||
if isinstance(data, dict): | ||
|
||
field_names = cls.field_names | ||
if field_names is None: | ||
if not isinstance(data, dict): | ||
return data | ||
# Get all class field names and their potential aliases | ||
field_names = set() | ||
for field_name, field in cls.model_fields.items(): | ||
field_names.add(field_name) | ||
if hasattr(field, 'alias') and field.alias: | ||
field_names.add(field.alias) | ||
|
||
# Compare against both field names and aliases | ||
extra_fields = data.keys() - field_names | ||
if extra_fields: | ||
logger.warning( | ||
"The following fields were present in the request " | ||
"but ignored: %s", extra_fields) | ||
if alias := getattr(field, 'alias', None): | ||
field_names.add(alias) | ||
cls.field_names = field_names | ||
|
||
# Compare against both field names and aliases | ||
if any(k not in field_names for k in data): | ||
logger.warning( | ||
"The following fields were present in the request " | ||
"but ignored: %s", | ||
data.keys() - field_names) | ||
return data | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,12 @@ def __init__( | |
# recomputing. | ||
self._kv_block_hashes: List[BlockHashType] = [] | ||
|
||
# Read-only views | ||
# Prevent directly appending to the these lists since | ||
# they should also be updated simultaneously. | ||
self.output_token_ids = ConstantList(self._output_token_ids) | ||
self.all_token_ids = ConstantList(self._all_token_ids) | ||
|
||
@classmethod | ||
def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request": | ||
return cls( | ||
|
@@ -79,18 +85,6 @@ def from_engine_core_request(cls, request: EngineCoreRequest) -> "Request": | |
lora_request=request.lora_request, | ||
) | ||
|
||
@property | ||
def output_token_ids(self) -> ConstantList[int]: | ||
# Prevent directly appending to the output_token_ids since | ||
# all_token_ids should also be updated simultaneously. | ||
return ConstantList(self._output_token_ids) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid constructing these objects every time the properties are accessed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice catch! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I actually thought properties were cached after the first call, nice call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That would involve the use of |
||
|
||
@property | ||
def all_token_ids(self) -> ConstantList[int]: | ||
# Prevent directly appending to the all_token_ids since | ||
# output_token_ids should also be updated simultaneously | ||
return ConstantList(self._all_token_ids) | ||
|
||
def append_output_token_ids( | ||
self, | ||
token_ids: Union[int, List[int]], | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to call unfreeze at some point?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, this is mostly static stuff that will be around for the lifetime of the process anyhow.
https://www.rippling.com/blog/the-garbage-collector-fights-back