Mucked up the rebasing. Fixing that now.

These files should not be different from what's in main Signed-off-by: Matthew Hendrey <[email protected]>
vllm-project · Jan 23, 2025 · 99243cf · 99243cf
1 parent 6867b37
commit 99243cf
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 62 deletions.
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -690,10 +690,8 @@ def add_request(
             arrival_time: The arrival time of the request. If None, we use
                 the current monotonic time.
             lora_request: The LoRA request to add.
-            lora_request: The LoRA request to add.
             trace_headers: OpenTelemetry trace headers.
             prompt_adapter_request: The prompt adapter request to add.
-            prompt_adapter_request: The prompt adapter request to add.
             priority: The priority of the request.
                 Only applicable with priority scheduling.
 

diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py
@@ -108,12 +108,6 @@ def __init__(
     ) -> None:
         super().__init__()
 
-        self.linear_in = ColumnParallelLinear(in_features,
-                                              hidden_features,
-                                              bias=False)
-        self.linear_out = RowParallelLinear(hidden_features,
-                                            output_dim,
-                                            bias=False)
         self.linear_in = ColumnParallelLinear(in_features,
                                               hidden_features,
                                               bias=False)
@@ -160,28 +154,16 @@ def __init__(self, config: AriaConfig) -> None:
         self.hidden_features = config.text_config.hidden_size
         self.output_dim = config.text_config.hidden_size
 
-        self.patch_to_query_dict = config.projector_patch_to_query_dict
-        self.in_features = config.vision_config.hidden_size
-        self.num_heads = config.vision_config.num_attention_heads
-        self.kv_dim = config.vision_config.hidden_size
-        self.hidden_features = config.text_config.hidden_size
-        self.output_dim = config.text_config.hidden_size
-
         self.query = nn.Parameter(
             torch.empty(config.max_value_projector_patch_to_query_dict,
                         self.in_features))
 
         self.cross_attn = AriaCrossAttention(config)
-        self.cross_attn = AriaCrossAttention(config)
 
         self.layer_norm = nn.LayerNorm(self.in_features)
         self.feed_forward = AriaProjectorMLP(self.in_features,
                                              self.hidden_features,
                                              self.output_dim)
-        self.layer_norm = nn.LayerNorm(self.in_features)
-        self.feed_forward = AriaProjectorMLP(self.in_features,
-                                             self.hidden_features,
-                                             self.output_dim)
 
     def forward(
         self,
@@ -197,16 +179,6 @@ def forward(
 
         query_num = self.patch_to_query_dict[num_patches]
 
-        queries = self.query[:query_num].unsqueeze(0).repeat(batch_size, 1, 1)
-        batch_size, num_patches = x.shape[0], x.shape[1]
-
-        if num_patches not in self.patch_to_query_dict:
-            raise KeyError(f"Number of patches {num_patches} not found in "
-                           "patch_to_query_dict amongst possible values "
-                           f"{self.patch_to_query_dict.keys()}.")
-
-        query_num = self.patch_to_query_dict[num_patches]
-
         queries = self.query[:query_num].unsqueeze(0).repeat(batch_size, 1, 1)
 
         if attn_mask is not None:
@@ -215,7 +187,6 @@ def forward(
 
         attention_out = self.cross_attn(x, queries, attn_mask=attn_mask)
 
-        out = self.feed_forward(self.layer_norm(attention_out))
         out = self.feed_forward(self.layer_norm(attention_out))
 
         return out
@@ -285,7 +256,6 @@ def __init__(
         self.shared_experts = LlamaMLP(
             config.hidden_size,
             config.intermediate_size * config.moe_num_shared_experts,
-            config.intermediate_size * config.moe_num_shared_experts,
             "silu",
             quant_config=quant_config,
             bias=config.mlp_bias,
@@ -330,7 +300,6 @@ def __init__(
     ) -> None:
         super().__init__(config, cache_config, quant_config, prefix)
         self.mlp = AriaTextMoELayer(config, quant_config=quant_config)
-        self.mlp = AriaTextMoELayer(config, quant_config=quant_config)
 
 
 class AriaTextModel(LlamaModel):
@@ -418,7 +387,6 @@ class AriaProcessingInfo(BaseProcessingInfo):
 
     def get_hf_config(self):
         return self.ctx.get_hf_config(AriaConfig)
-        return self.ctx.get_hf_config(AriaConfig)
 
     def get_vision_config(self):
         return self.get_hf_config().vision_config
@@ -601,22 +569,6 @@ def _create_patch_attention_mask(
         )
         return (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()
 
-    def _create_patch_attention_mask(
-            self, pixel_mask: Optional[torch.Tensor]) -> torch.Tensor:
-        if pixel_mask is None:
-            return None
-
-        patches_subgrid = pixel_mask.unfold(
-            dimension=1,
-            size=self.vision_tower.config.patch_size,
-            step=self.vision_tower.config.patch_size,
-        ).unfold(
-            dimension=2,
-            size=self.vision_tower.config.patch_size,
-            step=self.vision_tower.config.patch_size,
-        )
-        return (patches_subgrid.sum(dim=(-1, -2)) > 0).bool()
-
     def _process_image_input(
         self, image_input: AriaImagePixelInputs
     ) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -637,18 +589,6 @@ def _process_image_input(
             image_attn_mask = torch.logical_not(flattened_mask)
 
         return self.multi_modal_projector(image_outputs, image_attn_mask)
-        patch_attention_mask = self._create_patch_attention_mask(pixel_mask)
-
-        image_outputs = self.vision_tower(
-            pixel_values=pixel_values,
-            patch_attention_mask=patch_attention_mask,
-        )
-        image_attn_mask = None
-        if patch_attention_mask is not None:
-            flattened_mask = patch_attention_mask.flatten(1)
-            image_attn_mask = torch.logical_not(flattened_mask)
-
-        return self.multi_modal_projector(image_outputs, image_attn_mask)
 
     def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]:
         image_input = self._parse_and_validate_image_input(**kwargs)