Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error when separating opus files #14

Open
ElizavetaSedova opened this issue Aug 7, 2024 · 1 comment
Open

Error when separating opus files #14

ElizavetaSedova opened this issue Aug 7, 2024 · 1 comment

Comments

@ElizavetaSedova
Copy link

I tried to use hdemucs_mmi, UVR-MDX-NET-Inst_1, MDX23C
And an error is appiered. I suspect that this has something to do with the audio format. Because I don't get this error with other formats or after audio conversion. But converting is not very convenient.

File /workspace/vocal_remover/ultimatevocalremover_api/src/models.py:150, in Demucs.__call__(self, audio, sampling_rate, **kwargs)
    148 def __call__(self, audio:Union[npt.NDArray, str], sampling_rate:int=None, **kwargs)->dict:
    149     if isinstance(audio, str):
--> 150         return self.predict_path(audio)
    151     return self.predict(audio, sampling_rate)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models.py:146, in Demucs.predict_path(self, audio, **kwargs)
    144 audio, sampling_rate = read(audio)
    145 audio = torch.tensor(audio, dtype=torch.float32)
--> 146 return self.predict(audio, sampling_rate)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models.py:128, in Demucs.predict(self, audio, sampling_rate, **kwargs)
    125 elif isinstance(audio, list): 
    126     audio = torch.tensor(audio, dtype=torch.float32)
--> 128 origin, separated = self.model_api.separate_tensor(audio, sampling_rate)
    129 return separated

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/api.py:270, in Separator.separate_tensor(self, wav, sr)
    268 wav -= ref.mean()
    269 wav /= ref.std() + 1e-8
--> 270 out = apply_model(
    271         self._model,
    272         wav[None],
    273         segment=self._segment,
    274         shifts=self._shifts,
    275         split=self._split,
    276         overlap=self._overlap,
    277         device=self._device,
    278         num_workers=self._jobs,
    279         callback=self._callback,
    280         callback_arg=_replace_dict(
    281             self._callback_arg, ("audio_length", wav.shape[1])
    282         ),
    283         progress=self._progress,
    284     )
    285 if out is None:
    286     raise KeyboardInterrupt

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:216, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    213 original_model_device = next(iter(sub_model.parameters())).device
    214 sub_model.to(device)
--> 216 res = apply_model(sub_model, mix, **kwargs, callback_arg=callback_arg)
    217 out = res
    218 sub_model.to(original_model_device)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:251, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    246 shifted = TensorChunk(padded_mix, offset, length + max_shift - offset)
    247 kwargs["callback"] = (
    248         (lambda d, i=shift_idx: callback(_replace_dict(d, ("shift_idx", i)))
    249          if callback else None)
    250     )
--> 251 res = apply_model(model, shifted, **kwargs, callback_arg=callback_arg)
    252 shifted_out = res
    253 out += shifted_out[..., max_shift - offset:]

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:290, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    288 for future, offset in futures:
    289     try:
--> 290         chunk_out = future.result()  # type: th.Tensor
    291     except Exception:
    292         pool.shutdown(wait=True, cancel_futures=True)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/utils.py:132, in DummyPoolExecutor.DummyResult.result(self)
    130 def result(self):
    131     if self._dict["run"]:
--> 132         return self.func(*self.args, **self.kwargs)
    133     else:
    134         raise CancelledError()

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:317, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    315         callback(_replace_dict(callback_arg, ("state", "start")))  # type: ignore
    316 with th.no_grad():
--> 317     out = model(padded_mix)
    318 with lock:
    319     if callback is not None:

File ~/.conda/envs/my_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/hdemucs.py:693, in HDemucs.forward(self, mix)
    690 x = mix
    691 length = x.shape[-1]
--> 693 z = self._spec(mix)
    694 mag = self._magnitude(z).to(mix.device)
    695 x = mag

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/hdemucs.py:604, in HDemucs._spec(self, x)
    602 pad = hl // 2 * 3
    603 if not self.hybrid_old:
--> 604     x = pad1d(x, (pad, pad + le * hl - x.shape[-1]), mode='reflect')
    605 else:
    606     x = pad1d(x, (pad, pad + le * hl - x.shape[-1]))

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/hdemucs.py:39, in pad1d(x, paddings, mode, value)
     37 out = F.pad(x, paddings, mode, value)
     38 assert out.shape[-1] == length + padding_left + padding_right
---> 39 assert (out[..., padding_left: padding_left + length] == x0).all()
     40 return out
@ElizavetaSedova
Copy link
Author

I've added "opus" in the line of fastio.py to solve this problem:

if ext in ['wav', 'flac', 'ogg', 'mp3', 'opus']:

It might be worth expanding the list of extensions that are supported by the audiofile library.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant