Skip to content

Commit

Permalink
Mucked up the rebasing. Fixing that now.
Browse files Browse the repository at this point in the history
These files should not be different from what's in main

Signed-off-by: Matthew Hendrey <[email protected]>
  • Loading branch information
mhendrey committed Jan 23, 2025
1 parent 6867b37 commit 99243cf
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 62 deletions.
2 changes: 0 additions & 2 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,8 @@ def add_request(
arrival_time: The arrival time of the request. If None, we use
the current monotonic time.
lora_request: The LoRA request to add.
lora_request: The LoRA request to add.
trace_headers: OpenTelemetry trace headers.
prompt_adapter_request: The prompt adapter request to add.
prompt_adapter_request: The prompt adapter request to add.
priority: The priority of the request.
Only applicable with priority scheduling.
Expand Down
60 changes: 0 additions & 60 deletions vllm/model_executor/models/aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,6 @@ def __init__(
) -> None:
super().__init__()

self.linear_in = ColumnParallelLinear(in_features,
hidden_features,
bias=False)
self.linear_out = RowParallelLinear(hidden_features,
output_dim,
bias=False)
self.linear_in = ColumnParallelLinear(in_features,
hidden_features,
bias=False)
Expand Down Expand Up @@ -160,28 +154,16 @@ def __init__(self, config: AriaConfig) -> None:
self.hidden_features = config.text_config.hidden_size
self.output_dim = config.text_config.hidden_size

self.patch_to_query_dict = config.projector_patch_to_query_dict
self.in_features = config.vision_config.hidden_size
self.num_heads = config.vision_config.num_attention_heads
self.kv_dim = config.vision_config.hidden_size
self.hidden_features = config.text_config.hidden_size
self.output_dim = config.text_config.hidden_size

self.query = nn.Parameter(
torch.empty(config.max_value_projector_patch_to_query_dict,
self.in_features))

self.cross_attn = AriaCrossAttention(config)
self.cross_attn = AriaCrossAttention(config)

self.layer_norm = nn.LayerNorm(self.in_features)
self.feed_forward = AriaProjectorMLP(self.in_features,
self.hidden_features,
self.output_dim)
self.layer_norm = nn.LayerNorm(self.in_features)
self.feed_forward = AriaProjectorMLP(self.in_features,
self.hidden_features,
self.output_dim)

def forward(
self,
Expand All @@ -197,16 +179,6 @@ def forward(

query_num = self.patch_to_query_dict[num_patches]

queries = self.query[:query_num].unsqueeze(0).repeat(batch_size, 1, 1)
batch_size, num_patches = x.shape[0], x.shape[1]

if num_patches not in self.patch_to_query_dict:
raise KeyError(f"Number of patches {num_patches} not found in "
"patch_to_query_dict amongst possible values "
f"{self.patch_to_query_dict.keys()}.")

query_num = self.patch_to_query_dict[num_patches]

queries = self.query[:query_num].unsqueeze(0).repeat(batch_size, 1, 1)

if attn_mask is not None:
Expand All @@ -215,7 +187,6 @@ def forward(

attention_out = self.cross_attn(x, queries, attn_mask=attn_mask)

out = self.feed_forward(self.layer_norm(attention_out))
out = self.feed_forward(self.layer_norm(attention_out))

return out
Expand Down Expand Up @@ -285,7 +256,6 @@ def __init__(
self.shared_experts = LlamaMLP(
config.hidden_size,
config.intermediate_size * config.moe_num_shared_experts,
config.intermediate_size * config.moe_num_shared_experts,
"silu",
quant_config=quant_config,
bias=config.mlp_bias,
Expand Down Expand Up @@ -330,7 +300,6 @@ def __init__(
) -> None:
super().__init__(config, cache_config, quant_config, prefix)
self.mlp = AriaTextMoELayer(config, quant_config=quant_config)
self.mlp = AriaTextMoELayer(config, quant_config=quant_config)


class AriaTextModel(LlamaModel):
Expand Down Expand Up @@ -418,7 +387,6 @@ class AriaProcessingInfo(BaseProcessingInfo):

def get_hf_config(self):
return self.ctx.get_hf_config(AriaConfig)
return self.ctx.get_hf_config(AriaConfig)

def get_vision_config(self):
return self.get_hf_config().vision_config
Expand Down Expand Up @@ -601,22 +569,6 @@ def _create_patch_attention_mask(
)
return (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()

def _create_patch_attention_mask(
self, pixel_mask: Optional[torch.Tensor]) -> torch.Tensor:
if pixel_mask is None:
return None

patches_subgrid = pixel_mask.unfold(
dimension=1,
size=self.vision_tower.config.patch_size,
step=self.vision_tower.config.patch_size,
).unfold(
dimension=2,
size=self.vision_tower.config.patch_size,
step=self.vision_tower.config.patch_size,
)
return (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()

def _process_image_input(
self, image_input: AriaImagePixelInputs
) -> Tuple[torch.Tensor, torch.Tensor]:
Expand All @@ -637,18 +589,6 @@ def _process_image_input(
image_attn_mask = torch.logical_not(flattened_mask)

return self.multi_modal_projector(image_outputs, image_attn_mask)
patch_attention_mask = self._create_patch_attention_mask(pixel_mask)

image_outputs = self.vision_tower(
pixel_values=pixel_values,
patch_attention_mask=patch_attention_mask,
)
image_attn_mask = None
if patch_attention_mask is not None:
flattened_mask = patch_attention_mask.flatten(1)
image_attn_mask = torch.logical_not(flattened_mask)

return self.multi_modal_projector(image_outputs, image_attn_mask)

def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]:
image_input = self._parse_and_validate_image_input(**kwargs)
Expand Down

0 comments on commit 99243cf

Please sign in to comment.