Skip to content

Commit

Permalink
[Misc] Rename MultiModalInputsV2 -> MultiModalInputs (#12244)
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 authored Jan 21, 2025
1 parent 2fc6944 commit 9691255
Show file tree
Hide file tree
Showing 12 changed files with 31 additions and 31 deletions.
2 changes: 1 addition & 1 deletion docs/source/api/multimodal/inputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
```

```{eval-rst}
.. autoclass:: vllm.multimodal.inputs.MultiModalInputsV2
.. autoclass:: vllm.multimodal.inputs.MultiModalInputs
:members:
:show-inheritance:
```
12 changes: 6 additions & 6 deletions vllm/inputs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
if TYPE_CHECKING:
from vllm.multimodal import (MultiModalDataDict, MultiModalKwargs,
MultiModalPlaceholderDict)
from vllm.multimodal.inputs import MultiModalInputsV2
from vllm.multimodal.inputs import MultiModalInputs


class TextPrompt(TypedDict):
Expand Down Expand Up @@ -207,7 +207,7 @@ def token_inputs(
return inputs


DecoderOnlyInputs = Union[TokenInputs, "MultiModalInputsV2"]
DecoderOnlyInputs = Union[TokenInputs, "MultiModalInputs"]
"""
The inputs in :class:`~vllm.LLMEngine` before they are
passed to the model executor.
Expand All @@ -222,14 +222,14 @@ class EncoderDecoderInputs(TypedDict):
This specifies the required data for encoder-decoder models.
"""
encoder: Union[TokenInputs, "MultiModalInputsV2"]
encoder: Union[TokenInputs, "MultiModalInputs"]
"""The inputs for the encoder portion."""

decoder: Union[TokenInputs, "MultiModalInputsV2"]
decoder: Union[TokenInputs, "MultiModalInputs"]
"""The inputs for the decoder portion."""


SingletonInputs = Union[TokenInputs, "MultiModalInputsV2"]
SingletonInputs = Union[TokenInputs, "MultiModalInputs"]
"""
A processed :class:`SingletonPrompt` which can be passed to
:class:`vllm.sequence.Sequence`.
Expand Down Expand Up @@ -311,7 +311,7 @@ def multi_modal_hashes(self) -> List[str]:
return inputs.get("multi_modal_hashes", [])

if inputs["type"] == "multimodal":
# only the case when we use MultiModalInputsV2
# only the case when we use MultiModalInputs
return inputs.get("mm_hashes", []) # type: ignore[return-value]

assert_never(inputs) # type: ignore[arg-type]
Expand Down
6 changes: 3 additions & 3 deletions vllm/inputs/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalInputsV2
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalInputs
from vllm.prompt_adapter.request import PromptAdapterRequest
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup

Expand Down Expand Up @@ -247,7 +247,7 @@ def _process_multimodal(
mm_data: MultiModalDataDict,
mm_processor_kwargs: Optional[Mapping[str, object]],
lora_request: Optional[LoRARequest],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
"""
Apply the model's multi-modal processor to a multi-modal prompt,
returning the corresponding token IDs and metadata.
Expand All @@ -271,7 +271,7 @@ async def _process_multimodal_async(
mm_data: MultiModalDataDict,
mm_processor_kwargs: Optional[Mapping[str, object]],
lora_request: Optional[LoRARequest],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
"""Async version of :meth:`_process_multimodal`."""
tokenizer_group = self.get_tokenizer_group()
tokenizer = await tokenizer_group.get_lora_tokenizer_async(lora_request
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/blip2.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalInputs, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.parse import MultiModalDataItems
from vllm.multimodal.processing import (BaseMultiModalProcessor,
Expand Down Expand Up @@ -490,7 +490,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)

# Only <image> tokens should be considered as placeholders,
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from vllm.model_executor.utils import set_weight_attrs
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalInputs, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.parse import MultiModalDataItems
from vllm.multimodal.processing import (BaseMultiModalProcessor,
Expand Down Expand Up @@ -159,7 +159,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)

# Only <image> tokens should be considered as placeholders,
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalInputs, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.parse import (ImageProcessorItems, ImageSize,
MultiModalDataItems)
Expand Down Expand Up @@ -232,7 +232,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)

# Only |SPEAKER| (image) tokens should be considered as placeholders,
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalInputs, MultiModalKwargs,
NestedTensors)
from vllm.multimodal.parse import (ImageEmbeddingItems, ImageProcessorItems,
ImageSize, MultiModalDataItems)
Expand Down Expand Up @@ -746,7 +746,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
hf_config = self.info.get_hf_config()
image_token_id = hf_config.image_token_index

Expand Down Expand Up @@ -805,7 +805,7 @@ def get_replacement_mantis(item_idx: int):
for modality, placeholders in mm_placeholders.items()
}

return MultiModalInputsV2(
return MultiModalInputs(
type="multimodal",
prompt=prompt,
prompt_token_ids=prompt_ids,
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalInputs, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.parse import (ImageEmbeddingItems, ImageProcessorItems,
ImageSize, MultiModalDataItems)
Expand Down Expand Up @@ -484,7 +484,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)

# Only <|image|> tokens should be considered as placeholders,
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/qwen2_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalInputs, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.parse import (AudioProcessorItems, MultiModalDataItems,
MultiModalDataParser)
Expand Down Expand Up @@ -245,7 +245,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
result = super().apply(prompt, mm_data, hf_processor_mm_kwargs)

# Only <|AUDIO|> tokens should be considered as placeholders,
Expand Down
2 changes: 1 addition & 1 deletion vllm/multimodal/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def get_items(self, modality: str) -> Sequence[MultiModalKwargsItem]:
"""


class MultiModalInputsV2(TypedDict):
class MultiModalInputs(TypedDict):
"""
Represents the outputs of
:class:`vllm.multimodal.processing.BaseMultiModalProcessor`,
Expand Down
10 changes: 5 additions & 5 deletions vllm/multimodal/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

from .hasher import MultiModalHasher
from .inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
MultiModalKwargsItem, PlaceholderRange)
MultiModalInputs, MultiModalKwargs, MultiModalKwargsItem,
PlaceholderRange)
from .parse import MultiModalDataItems, MultiModalDataParser

if TYPE_CHECKING:
Expand Down Expand Up @@ -609,7 +609,7 @@ def __call__(
prompt: str,
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
return self.apply(prompt, mm_data, hf_processor_mm_kwargs)

def _get_data_parser(self) -> MultiModalDataParser:
Expand Down Expand Up @@ -1067,7 +1067,7 @@ def apply(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
"""
Process multi-modal inputs to be used in vLLM.
Expand Down Expand Up @@ -1169,7 +1169,7 @@ def apply(
for modality, placeholders in mm_placeholders.items()
}

return MultiModalInputsV2(
return MultiModalInputs(
type="multimodal",
prompt=prompt,
prompt_token_ids=prompt_ids,
Expand Down
4 changes: 2 additions & 2 deletions vllm/multimodal/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from vllm.inputs import DummyData
from vllm.logger import init_logger

from .inputs import MultiModalDataDict, MultiModalInputsV2
from .inputs import MultiModalDataDict, MultiModalInputs
from .processing import BaseMultiModalProcessor, BaseProcessingInfo

logger = init_logger(__name__)
Expand Down Expand Up @@ -131,7 +131,7 @@ def _get_dummy_mm_inputs(
self,
seq_len: int,
mm_counts: Mapping[str, int],
) -> MultiModalInputsV2:
) -> MultiModalInputs:
factory = self.dummy_inputs
processor_inputs = factory.get_dummy_processor_inputs(
seq_len, mm_counts)
Expand Down

0 comments on commit 9691255

Please sign in to comment.