Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tensors of the same index must be on the same device and the same dtype except step tensors that can be CPU and float32 notwithstanding #315

Open
wurevvc opened this issue Mar 18, 2024 · 2 comments

Comments

@wurevvc
Copy link

wurevvc commented Mar 18, 2024

When I was training, this error occurred

@NitzanHod
Copy link

Please reply if you managed to solve this issue!

The error is raised on the optimizer.step() when running the fine tune script as described in the README on a fresh new cloned repository, i suspect this is due to errors in newer pytorch versions, see pytorch/pytorch#127197
I get this error with torch==2.5.1.
I attach the stack below:

{'loss': 1.0453, 'grad_norm': 1.2838969230651855, 'learning_rate': 1.5557084630007206e-05, 'epoch': 1.0}
33%|███▎ | 406/1218 [24:06<48:52, 3.61s/it]Traceback (most recent call last):
File "/home/h/stanford_alpaca/train.py", line 222, in
Traceback (most recent call last):
File "/home/h/stanford_alpaca/train.py", line 222, in
Traceback (most recent call last):
File "/home/h/stanford_alpaca/train.py", line 222, in
train()
File "/home/h/stanford_alpaca/train.py", line 216, in train
train()
File "/home/h/stanford_alpaca/train.py", line 216, in train
trainer.train()
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2123, in train
train()
File "/home/h/stanford_alpaca/train.py", line 216, in train
trainer.train()
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2123, in train
trainer.train()
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2123, in train
return inner_training_loop(
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2534, in _inner_training_loop
return inner_training_loop(
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2534, in _inner_training_loop
return inner_training_loop(
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2534, in _inner_training_loop
self.optimizer.step()
File "/home/h/miniconda3/lib/python3.9/site-packages/accelerate/optimizer.py", line 149, in step
self.optimizer.step()
File "/home/h/miniconda3/lib/python3.9/site-packages/accelerate/optimizer.py", line 149, in step
self.optimizer.step()
File "/home/h/miniconda3/lib/python3.9/site-packages/accelerate/optimizer.py", line 149, in step
self.optimizer.step(closure)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/lr_scheduler.py", line 75, in wrapper
self.optimizer.step(closure)self.optimizer.step(closure)

File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/lr_scheduler.py", line 75, in wrapper
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/lr_scheduler.py", line 75, in wrapper
return wrapped(*args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 385, in wrapper
return wrapped(*args, **kwargs)return wrapped(*args, **kwargs)

File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 385, in wrapper
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 385, in wrapper
out = func(*args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 76, in _use_grad
out = func(*args, **kwargs)out = func(*args, **kwargs)

File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 76, in _use_grad
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 76, in _use_grad
ret = func(self, *args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 187, in step
ret = func(self, *args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 187, in step
ret = func(self, *args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 187, in step
adamw(
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 339, in adamw
adamw(
adamw(
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 339, in adamw
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 339, in adamw
func(
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 516, in _multi_tensor_adamw
func(func(

File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 516, in _multi_tensor_adamw
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 516, in _multi_tensor_adamw
grouped_tensors = Optimizer._group_tensors_by_device_and_dtype([
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 409, in _group_tensors_by_device_and_dtype
grouped_tensors = Optimizer._group_tensors_by_device_and_dtype([
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 409, in _group_tensors_by_device_and_dtype
grouped_tensors = Optimizer._group_tensors_by_device_and_dtype([
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 409, in _group_tensors_by_device_and_dtype
return _group_tensors_by_device_and_dtype(tensorlistlist, with_indices)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return _group_tensors_by_device_and_dtype(tensorlistlist, with_indices)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return _group_tensors_by_device_and_dtype(tensorlistlist, with_indices)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_foreach_utils.py", line 38, in _group_tensors_by_device_and_dtype
return func(*args, **kwargs)return func(*args, **kwargs)

File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_foreach_utils.py", line 38, in _group_tensors_by_device_and_dtype
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_foreach_utils.py", line 38, in _group_tensors_by_device_and_dtype
torch._C._group_tensors_by_device_and_dtype(tensorlistlist, with_indices).items()
RuntimeError: Tensors of the same index must be on the same device and the same dtype except step tensors that can be CPU and float32 notwithstanding
torch._C._group_tensors_by_device_and_dtype(tensorlistlist, with_indices).items()torch._C._group_tensors_by_device_and_dtype(tensorlistlist, with_indices).items()

RuntimeError: Tensors of the same index must be on the same device and the same dtype except step tensors that can be CPU and float32 notwithstanding
RuntimeError: Traceback (most recent call last):
Tensors of the same index must be on the same device and the same dtype except step tensors that can be CPU and float32 notwithstanding
File "/home/h/stanford_alpaca/train.py", line 222, in
train()
File "/home/h/stanford_alpaca/train.py", line 216, in train
trainer.train()
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2123, in train
return inner_training_loop(
File "/home/h/miniconda3/lib/python3.9/site-packages/transformers/trainer.py", line 2534, in _inner_training_loop
self.optimizer.step()
File "/home/h/miniconda3/lib/python3.9/site-packages/accelerate/optimizer.py", line 149, in step
self.optimizer.step(closure)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/lr_scheduler.py", line 75, in wrapper
return wrapped(*args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 385, in wrapper
out = func(*args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 76, in _use_grad
ret = func(self, *args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 187, in step
adamw(
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 339, in adamw
func(
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/adamw.py", line 516, in _multi_tensor_adamw
grouped_tensors = Optimizer._group_tensors_by_device_and_dtype([
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/optim/optimizer.py", line 409, in _group_tensors_by_device_and_dtype
return _group_tensors_by_device_and_dtype(tensorlistlist, with_indices)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/h/miniconda3/lib/python3.9/site-packages/torch/utils/_foreach_utils.py", line 38, in _group_tensors_by_device_and_dtype
torch._C._group_tensors_by_device_and_dtype(tensorlistlist, with_indices).items()
RuntimeError: Tensors of the same index must be on the same device and the same dtype except step tensors that can be CPU and float32 notwithstanding

@NitzanHod
Copy link

For me the issue was solved by downgrading the transformers package to transformers==4.28.1 (and using torch==2.5.1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants