diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py index b8a302cf5087f..32456fee06a28 100644 --- a/vllm/model_executor/layers/activation.py +++ b/vllm/model_executor/layers/activation.py @@ -64,8 +64,8 @@ def __init__(self): if current_platform.is_cuda_alike() or current_platform.is_cpu(): self.op = torch.ops._C.silu_and_mul elif current_platform.is_xpu(): - import intel_extension_for_pytorch as ipex - self.op = ipex.llm.functional.silu_and_mul + from vllm._ipex_ops import ipex_ops + self.op = ipex_ops.silu_and_mul def forward_native(self, x: torch.Tensor) -> torch.Tensor: """PyTorch-native implementation equivalent to forward().""" diff --git a/vllm/plugins/__init__.py b/vllm/plugins/__init__.py index c50eb2cef4cd5..e5fa4f0e4a2f6 100644 --- a/vllm/plugins/__init__.py +++ b/vllm/plugins/__init__.py @@ -63,8 +63,8 @@ def load_general_plugins(): from vllm.platforms import current_platform if current_platform.is_xpu(): - # see https://github.com/pytorch/pytorch/blob/8cada5cbe5450e17c26fb8b358116785324537b2/torch/_dynamo/config.py#L158 # noqa - os.environ['TORCH_COMPILE_DISABLE'] = 'True' + # see https://github.com/pytorch/pytorch/blob/43c5f59/torch/_dynamo/config.py#L158 + torch._dynamo.config.disable = True if current_platform.is_hpu(): # NOTE(kzawora): PT HPU lazy backend (PT_HPU_LAZY_MODE = 1) # does not support torch.compile @@ -72,7 +72,6 @@ def load_general_plugins(): # torch.compile support is_lazy = os.environ.get('PT_HPU_LAZY_MODE', '1') == '1' if is_lazy: - # see https://github.com/pytorch/pytorch/blob/43c5f59/torch/_dynamo/config.py#L158 torch._dynamo.config.disable = True # NOTE(kzawora) multi-HPU inference with HPUGraphs (lazy-only) # requires enabling lazy collectives