Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model] Implement merged input processor for LLaVA model #10676

Merged
merged 24 commits into from
Dec 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7b6c4f1
Add `get_dummy_data` to `MultiModalProcessor`; fix and test `iter_pla…
DarkLight1337 Nov 26, 2024
de8332a
Use merged processor for llava model
DarkLight1337 Nov 26, 2024
8b6804e
format
DarkLight1337 Nov 26, 2024
26e3fdf
Fix typo
DarkLight1337 Nov 26, 2024
93d27bc
Enable the test to pass on V1
DarkLight1337 Nov 26, 2024
d697241
Handle embedding inputs
DarkLight1337 Nov 26, 2024
ca11cc9
format
DarkLight1337 Nov 26, 2024
c32cba9
Merge branch 'main' into llava-mm-processor
DarkLight1337 Nov 27, 2024
6c5c9ca
Fix wrong ndim
DarkLight1337 Nov 27, 2024
0194324
Factor out `merge_placeholders`
DarkLight1337 Nov 27, 2024
09618d0
Fix placeholder maps handling on V0
DarkLight1337 Nov 27, 2024
5501458
Remove unused dummy data code
DarkLight1337 Nov 27, 2024
f3673c7
Update dummy model
DarkLight1337 Nov 27, 2024
37bc008
Enable overriding hf processor and tokenizer; fix `_apply_prompt_repl…
DarkLight1337 Nov 27, 2024
4805a9e
Improve error handling in `_resolve_matches`; merge matches directly
DarkLight1337 Nov 27, 2024
8539008
Avoid hashing
DarkLight1337 Nov 27, 2024
00244c7
Update mapper tests
DarkLight1337 Nov 27, 2024
a00f541
Merge branch 'main' into llava-mm-processor
DarkLight1337 Dec 4, 2024
b31f8d4
Avoid calling input mapper in the first place
DarkLight1337 Dec 4, 2024
711cd38
Fix missing `multi_modal_kwargs` in dummy data
DarkLight1337 Dec 5, 2024
a11c6b2
Update dummy model
DarkLight1337 Dec 5, 2024
1d5a4d4
proper processing
ywang96 Dec 6, 2024
000736b
Patch pixtral processor
DarkLight1337 Dec 6, 2024
1485c05
Fix double counting of `mm_counts`
DarkLight1337 Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 3 additions & 46 deletions tests/multimodal/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
import pytest
from transformers import CLIPImageProcessor, LlavaNextImageProcessor
from transformers import LlavaNextImageProcessor

from vllm.config import ModelConfig
from vllm.multimodal import MultiModalRegistry
Expand All @@ -14,49 +14,6 @@ def mm_registry():
return MultiModalRegistry()


@pytest.mark.parametrize("dtype", ["half", "float"])
@pytest.mark.parametrize("size_factor", [0.25, 0.5, 1.0])
def test_clip_image_processor(image_assets, mm_registry, dtype, size_factor):
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"

hf_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)
assert isinstance(hf_processor, CLIPImageProcessor)

model_config = ModelConfig(
model=MODEL_NAME,
task="auto",
tokenizer=MODEL_NAME,
tokenizer_mode="auto",
trust_remote_code=False,
seed=0,
dtype=dtype,
revision=None,
limit_mm_per_prompt={"image": 1},
)

mm_registry.init_mm_limits_per_prompt(model_config)

for asset in image_assets:
image = rescale_image_size(asset.pil_image, size_factor)

hf_result = hf_processor.preprocess(
image,
return_tensors="pt",
)
vllm_result = mm_registry.map_input(
model_config,
{"image": image},
)

assert hf_result.keys() == vllm_result.keys()
for key, hf_tensor in hf_result.items():
hf_arr: np.ndarray = hf_tensor.numpy()
vllm_arr: np.ndarray = vllm_result[key].numpy()

assert hf_arr.shape == vllm_arr.shape, f"Failed for key={key}"
assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"


@pytest.mark.parametrize("dtype", ["half", "float"])
@pytest.mark.parametrize("size_factor", [0.25, 0.5, 1.0])
def test_llava_next_image_processor(image_assets, mm_registry, dtype,
Expand Down Expand Up @@ -107,7 +64,7 @@ def test_llava_next_image_processor(image_assets, mm_registry, dtype,
(2, 1, False), (2, 2, True)],
)
def test_mm_limits(image_assets, mm_registry, num_images, limit, is_valid):
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
MODEL_NAME = "llava-hf/llava-v1.6-mistral-7b-hf"

model_config = ModelConfig(
model=MODEL_NAME,
Expand Down Expand Up @@ -138,7 +95,7 @@ def test_mm_limits(image_assets, mm_registry, num_images, limit, is_valid):
# NOTE: We don't test zero images since the HF processor doesn't support it
@pytest.mark.parametrize("num_images", [1, 2])
def test_image_mapper_multi(image_assets, mm_registry, num_images):
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
MODEL_NAME = "llava-hf/llava-v1.6-mistral-7b-hf"

model_config = ModelConfig(
model=MODEL_NAME,
Expand Down
Loading
Loading