We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
使用GLM-4的finetune_demo对glm-4-9b-chat模型进行微调,由于我们机器是macbook apple m3,性能不佳,也没有GPU,因此对 因此将configs/lora.yaml的训练步数及保存点参数做了以下的调整: save_steps:10 #原值为500 max_steps:30 #原值为3000 而后通过示例的如下命令进行微调: python finetune.py data/AdvertiseGen/ THUDM/glm-4-9b-chat configs/lora.yaml 微后后产生了3个保存点: 而后使用微调后的模型: python inference.py out/checkpoint-30 这时报了以下的错误: KeyError: 'base_model.model.transformer.encoder.layers.6.input_layernorm.weight' 输出的错误信息为: 'NoneType' object has no attribute 'cadam32bit_grad_fp32' Some parameters are on the meta device because they were offloaded to the disk. ╭────────────────────────────────────────────────────────── Traceback (most recent call last) ───────────────────────────────────────────────────────────╮ │ /Users/itstamen/Documents/workspace/ai/GLM-4/finetune_demo/inference.py:133 in main │ │ │ │ 130 │ │ "repetition_penalty": 1.2, │ │ 131 │ │ "eos_token_id": model.config.eos_token_id, │ │ 132 │ } │ │ ❱ 133 │ outputs = model.generate(**inputs, **generate_kwargs) │ │ 134 │ response = tokenizer.decode( │ │ 135 │ │ outputs[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True │ │ 136 │ ).strip() │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/peft/peft_model.py:1838 in generate │ │ │ │ 1835 │ │ │ if not peft_config.is_prompt_learning: │ │ 1836 │ │ │ │ with self._enable_peft_forward_hooks(*args, **kwargs): │ │ 1837 │ │ │ │ │ kwargs = {k: v for k, v in kwargs.items() if k not in self.special_p │ │ ❱ 1838 │ │ │ │ │ outputs = self.base_model.generate(*args, **kwargs) │ │ 1839 │ │ │ else: │ │ 1840 │ │ │ │ outputs = self.base_model.generate(**kwargs) │ │ 1841 │ │ except: │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/utils/_contextlib.py:116 in decorate_context │ │ │ │ 113 │ @functools.wraps(func) │ │ 114 │ def decorate_context(*args, **kwargs): │ │ 115 │ │ with ctx_factory(): │ │ ❱ 116 │ │ │ return func(*args, **kwargs) │ │ 117 │ │ │ 118 │ return decorate_context │ │ 119 │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/transformers/generation/utils.py:2255 in generate │ │ │ │ 2252 │ │ │ ) │ │ 2253 │ │ │ │ │ 2254 │ │ │ # 12. run sample (it degenerates to greedy search when `generation_config.do │ │ ❱ 2255 │ │ │ result = self._sample( │ │ 2256 │ │ │ │ input_ids, │ │ 2257 │ │ │ │ logits_processor=prepared_logits_processor, │ │ 2258 │ │ │ │ stopping_criteria=prepared_stopping_criteria, │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/transformers/generation/utils.py:3254 in _sample │ │ │ │ 3251 │ │ │ model_inputs.update({"output_hidden_states": output_hidden_states} if output │ │ 3252 │ │ │ │ │ 3253 │ │ │ if is_prefill: │ │ ❱ 3254 │ │ │ │ outputs = self(**model_inputs, return_dict=True) │ │ 3255 │ │ │ │ is_prefill = False │ │ 3256 │ │ │ else: │ │ 3257 │ │ │ │ outputs = model_forward(**model_inputs, return_dict=True) │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │ │ │ │ 1733 │ │ if self._compiled_call_impl is not None: │ │ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │ │ 1735 │ │ else: │ │ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │ │ 1737 │ │ │ 1738 │ # torchrec tests the code consistency with the following code │ │ 1739 │ # fmt: off │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │ │ │ │ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │ │ 1748 │ │ │ │ 1749 │ │ result = None │ │ 1750 │ │ called_always_called_hooks = set() │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:170 in new_forward │ │ │ │ 167 │ │ │ with torch.no_grad(): │ │ 168 │ │ │ │ output = module._old_forward(*args, **kwargs) │ │ 169 │ │ else: │ │ ❱ 170 │ │ │ output = module._old_forward(*args, **kwargs) │ │ 171 │ │ return module._hf_hook.post_forward(module, output) │ │ 172 │ │ │ 173 │ # Overriding a GraphModuleImpl forward freezes the forward call and later modificati │ │ │ │ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:994 in forward │ │ │ │ 991 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │ │ 992 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │ │ 993 │ │ │ │ ❱ 994 │ │ transformer_outputs = self.transformer( │ │ 995 │ │ │ input_ids=input_ids, │ │ 996 │ │ │ position_ids=position_ids, │ │ 997 │ │ │ attention_mask=attention_mask, │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │ │ │ │ 1733 │ │ if self._compiled_call_impl is not None: │ │ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │ │ 1735 │ │ else: │ │ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │ │ 1737 │ │ │ 1738 │ # torchrec tests the code consistency with the following code │ │ 1739 │ # fmt: off │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │ │ │ │ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1746 │ │ │ │ or _global_forward_hooks or global_forward_pre_hooks): │ │ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │ │ 1748 │ │ │ │ 1749 │ │ result = None │ │ 1750 │ │ called_always_called_hooks = set() │ │ │ │ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:892 in forward │ │ │ │ 889 │ │ │ rotary_pos_emb = rotary_pos_emb[None, :seq_length] │ │ 890 │ │ │ │ 891 │ │ # Run encoder. │ │ ❱ 892 │ │ hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder( │ │ 893 │ │ │ inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb, │ │ 894 │ │ │ kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output │ │ 895 │ │ ) │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │ │ │ │ 1733 │ │ if self._compiled_call_impl is not None: │ │ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │ │ 1735 │ │ else: │ │ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │ │ 1737 │ │ │ 1738 │ # torchrec tests the code consistency with the following code │ │ 1739 │ # fmt: off │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │ │ │ │ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │ │ 1748 │ │ │ │ 1749 │ │ result = None │ │ 1750 │ │ called_always_called_hooks = set() │ │ │ │ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:722 in forward │ │ │ │ 719 │ │ │ │ │ use_reentrant=False │ │ 720 │ │ │ │ ) │ │ 721 │ │ │ else: │ │ ❱ 722 │ │ │ │ layer_ret = layer( │ │ 723 │ │ │ │ │ hidden_states, │ │ 724 │ │ │ │ │ attention_mask, │ │ 725 │ │ │ │ │ rotary_pos_emb, │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │ │ │ │ 1733 │ │ if self._compiled_call_impl is not None: │ │ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │ │ 1735 │ │ else: │ │ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │ │ 1737 │ │ │ 1738 │ # torchrec tests the code consistency with the following code │ │ 1739 │ # fmt: off │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │ │ │ │ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │ │ 1748 │ │ │ │ 1749 │ │ result = None │ │ 1750 │ │ called_always_called_hooks = set() │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:170 in new_forward │ │ │ │ 167 │ │ │ with torch.no_grad(): │ │ 168 │ │ │ │ output = module._old_forward(*args, **kwargs) │ │ 169 │ │ else: │ │ ❱ 170 │ │ │ output = module._old_forward(*args, **kwargs) │ │ 171 │ │ return module._hf_hook.post_forward(module, output) │ │ 172 │ │ │ 173 │ # Overriding a GraphModuleImpl forward freezes the forward call and later modificati │ │ │ │ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:623 in forward │ │ │ │ 620 │ │ # hidden_states: [s, b, h] │ │ 621 │ │ │ │ 622 │ │ # Layer norm at the beginning of the transformer layer. │ │ ❱ 623 │ │ layernorm_output = self.input_layernorm(hidden_states) │ │ 624 │ │ # Self attention. │ │ 625 │ │ attention_output, kv_cache = self.self_attention( │ │ 626 │ │ │ layernorm_output, │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │ │ │ │ 1733 │ │ if self._compiled_call_impl is not None: │ │ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │ │ 1735 │ │ else: │ │ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │ │ 1737 │ │ │ 1738 │ # torchrec tests the code consistency with the following code │ │ 1739 │ # fmt: off │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │ │ │ │ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │ │ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │ │ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │ │ 1748 │ │ │ │ 1749 │ │ result = None │ │ 1750 │ │ called_always_called_hooks = set() │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:165 in new_forward │ │ │ │ 162 │ module._hf_hook = hook │ │ 163 │ │ │ 164 │ def new_forward(module, *args, **kwargs): │ │ ❱ 165 │ │ args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) │ │ 166 │ │ if module._hf_hook.no_grad: │ │ 167 │ │ │ with torch.no_grad(): │ │ 168 │ │ │ │ output = module._old_forward(*args, **kwargs) │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:336 in pre_forward │ │ │ │ 333 │ │ │ │ remove_non_persistent=True, │ │ 334 │ │ │ ): │ │ 335 │ │ │ │ fp16_statistics = None │ │ ❱ 336 │ │ │ │ value = self.weights_map[name] │ │ 337 │ │ │ │ if "weight" in name and name.replace("weight", "SCB") in self.weights_ma │ │ 338 │ │ │ │ │ if value.dtype == torch.int8: │ │ 339 │ │ │ │ │ │ fp16_statistics = self.weights_map[name.replace("weight", "SCB") │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/utils/offload.py:118 in getitem │ │ │ │ 115 │ │ self.prefix = prefix │ │ 116 │ │ │ 117 │ def getitem(self, key): │ │ ❱ 118 │ │ return self.dataset[f"{self.prefix}{key}"] │ │ 119 │ │ │ 120 │ def iter(self): │ │ 121 │ │ return iter([key for key in self.dataset if key.startswith(self.prefix)]) │ │ │ │ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/utils/offload.py:165 in getitem │ │ │ │ 162 │ │ # State dict gets priority │ │ 163 │ │ if key in self.state_dict: │ │ 164 │ │ │ return self.state_dict[key] │ │ ❱ 165 │ │ weight_info = self.index[key] │ │ 166 │ │ if weight_info.get("safetensors_file") is not None: │ │ 167 │ │ │ device = "cpu" if self.device is None else self.device │ │ 168 │ │ │ tensor = None │ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
save_steps:10 #原值为500 max_steps:30 #原值为3000
python finetune.py data/AdvertiseGen/ THUDM/glm-4-9b-chat configs/lora.yaml
python inference.py out/checkpoint-30
请问是什么原因造成的,有什么解决思路和方法,谢谢!
@zRzRzRzRzRzRzR @wwewwt @Sengxian
python finetune.py data/AdvertiseGen/ THUDM/glm-4-9b-chat configs/lora.yaml 微后后产生了3个保存点: 而后使用微调后的模型: python inference.py out/checkpoint-30
希望可以正常启动微调后的模型。
The text was updated successfully, but these errors were encountered:
是直接使用m3芯片跑吗?我们没有对它进行测试
Sorry, something went wrong.
zhipuch
No branches or pull requests
System Info / 系統信息
使用GLM-4的finetune_demo对glm-4-9b-chat模型进行微调,由于我们机器是macbook apple m3,性能不佳,也没有GPU,因此对
因此将configs/lora.yaml的训练步数及保存点参数做了以下的调整:
save_steps:10 #原值为500 max_steps:30 #原值为3000
而后通过示例的如下命令进行微调:
python finetune.py data/AdvertiseGen/ THUDM/glm-4-9b-chat configs/lora.yaml
微后后产生了3个保存点:
而后使用微调后的模型:
python inference.py out/checkpoint-30
这时报了以下的错误:
KeyError: 'base_model.model.transformer.encoder.layers.6.input_layernorm.weight'
输出的错误信息为:
'NoneType' object has no attribute 'cadam32bit_grad_fp32'
Some parameters are on the meta device because they were offloaded to the disk.
╭────────────────────────────────────────────────────────── Traceback (most recent call last) ───────────────────────────────────────────────────────────╮
│ /Users/itstamen/Documents/workspace/ai/GLM-4/finetune_demo/inference.py:133 in main │
│ │
│ 130 │ │ "repetition_penalty": 1.2, │
│ 131 │ │ "eos_token_id": model.config.eos_token_id, │
│ 132 │ } │
│ ❱ 133 │ outputs = model.generate(**inputs, **generate_kwargs) │
│ 134 │ response = tokenizer.decode( │
│ 135 │ │ outputs[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True │
│ 136 │ ).strip() │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/peft/peft_model.py:1838 in generate │
│ │
│ 1835 │ │ │ if not peft_config.is_prompt_learning: │
│ 1836 │ │ │ │ with self._enable_peft_forward_hooks(*args, **kwargs): │
│ 1837 │ │ │ │ │ kwargs = {k: v for k, v in kwargs.items() if k not in self.special_p │
│ ❱ 1838 │ │ │ │ │ outputs = self.base_model.generate(*args, **kwargs) │
│ 1839 │ │ │ else: │
│ 1840 │ │ │ │ outputs = self.base_model.generate(**kwargs) │
│ 1841 │ │ except: │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/utils/_contextlib.py:116 in decorate_context │
│ │
│ 113 │ @functools.wraps(func) │
│ 114 │ def decorate_context(*args, **kwargs): │
│ 115 │ │ with ctx_factory(): │
│ ❱ 116 │ │ │ return func(*args, **kwargs) │
│ 117 │ │
│ 118 │ return decorate_context │
│ 119 │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/transformers/generation/utils.py:2255 in generate │
│ │
│ 2252 │ │ │ ) │
│ 2253 │ │ │ │
│ 2254 │ │ │ # 12. run sample (it degenerates to greedy search when `generation_config.do │
│ ❱ 2255 │ │ │ result = self._sample( │
│ 2256 │ │ │ │ input_ids, │
│ 2257 │ │ │ │ logits_processor=prepared_logits_processor, │
│ 2258 │ │ │ │ stopping_criteria=prepared_stopping_criteria, │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/transformers/generation/utils.py:3254 in _sample │
│ │
│ 3251 │ │ │ model_inputs.update({"output_hidden_states": output_hidden_states} if output │
│ 3252 │ │ │ │
│ 3253 │ │ │ if is_prefill: │
│ ❱ 3254 │ │ │ │ outputs = self(**model_inputs, return_dict=True) │
│ 3255 │ │ │ │ is_prefill = False │
│ 3256 │ │ │ else: │
│ 3257 │ │ │ │ outputs = model_forward(**model_inputs, return_dict=True) │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │
│ │
│ 1733 │ │ if self._compiled_call_impl is not None: │
│ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1735 │ │ else: │
│ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1737 │ │
│ 1738 │ # torchrec tests the code consistency with the following code │
│ 1739 │ # fmt: off │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │
│ │
│ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │
│ 1748 │ │ │
│ 1749 │ │ result = None │
│ 1750 │ │ called_always_called_hooks = set() │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:170 in new_forward │
│ │
│ 167 │ │ │ with torch.no_grad(): │
│ 168 │ │ │ │ output = module._old_forward(*args, **kwargs) │
│ 169 │ │ else: │
│ ❱ 170 │ │ │ output = module._old_forward(*args, **kwargs) │
│ 171 │ │ return module._hf_hook.post_forward(module, output) │
│ 172 │ │
│ 173 │ # Overriding a GraphModuleImpl forward freezes the forward call and later modificati │
│ │
│ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:994 in forward │
│ │
│ 991 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │
│ 992 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 993 │ │ │
│ ❱ 994 │ │ transformer_outputs = self.transformer( │
│ 995 │ │ │ input_ids=input_ids, │
│ 996 │ │ │ position_ids=position_ids, │
│ 997 │ │ │ attention_mask=attention_mask, │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │
│ │
│ 1733 │ │ if self._compiled_call_impl is not None: │
│ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1735 │ │ else: │
│ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1737 │ │
│ 1738 │ # torchrec tests the code consistency with the following code │
│ 1739 │ # fmt: off │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │
│ │
│ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1746 │ │ │ │ or _global_forward_hooks or global_forward_pre_hooks): │
│ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │
│ 1748 │ │ │
│ 1749 │ │ result = None │
│ 1750 │ │ called_always_called_hooks = set() │
│ │
│ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:892 in forward │
│ │
│ 889 │ │ │ rotary_pos_emb = rotary_pos_emb[None, :seq_length] │
│ 890 │ │ │
│ 891 │ │ # Run encoder. │
│ ❱ 892 │ │ hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder( │
│ 893 │ │ │ inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb, │
│ 894 │ │ │ kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output │
│ 895 │ │ ) │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │
│ │
│ 1733 │ │ if self._compiled_call_impl is not None: │
│ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1735 │ │ else: │
│ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1737 │ │
│ 1738 │ # torchrec tests the code consistency with the following code │
│ 1739 │ # fmt: off │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │
│ │
│ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │
│ 1748 │ │ │
│ 1749 │ │ result = None │
│ 1750 │ │ called_always_called_hooks = set() │
│ │
│ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:722 in forward │
│ │
│ 719 │ │ │ │ │ use_reentrant=False │
│ 720 │ │ │ │ ) │
│ 721 │ │ │ else: │
│ ❱ 722 │ │ │ │ layer_ret = layer( │
│ 723 │ │ │ │ │ hidden_states, │
│ 724 │ │ │ │ │ attention_mask, │
│ 725 │ │ │ │ │ rotary_pos_emb, │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │
│ │
│ 1733 │ │ if self._compiled_call_impl is not None: │
│ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1735 │ │ else: │
│ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1737 │ │
│ 1738 │ # torchrec tests the code consistency with the following code │
│ 1739 │ # fmt: off │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │
│ │
│ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │
│ 1748 │ │ │
│ 1749 │ │ result = None │
│ 1750 │ │ called_always_called_hooks = set() │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:170 in new_forward │
│ │
│ 167 │ │ │ with torch.no_grad(): │
│ 168 │ │ │ │ output = module._old_forward(*args, **kwargs) │
│ 169 │ │ else: │
│ ❱ 170 │ │ │ output = module._old_forward(*args, **kwargs) │
│ 171 │ │ return module._hf_hook.post_forward(module, output) │
│ 172 │ │
│ 173 │ # Overriding a GraphModuleImpl forward freezes the forward call and later modificati │
│ │
│ /Users/itstamen/.cache/huggingface/modules/transformers_modules/glm-4-9b-chat/modeling_chatglm.py:623 in forward │
│ │
│ 620 │ │ # hidden_states: [s, b, h] │
│ 621 │ │ │
│ 622 │ │ # Layer norm at the beginning of the transformer layer. │
│ ❱ 623 │ │ layernorm_output = self.input_layernorm(hidden_states) │
│ 624 │ │ # Self attention. │
│ 625 │ │ attention_output, kv_cache = self.self_attention( │
│ 626 │ │ │ layernorm_output, │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1736 in _wrapped_call_impl │
│ │
│ 1733 │ │ if self._compiled_call_impl is not None: │
│ 1734 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] │
│ 1735 │ │ else: │
│ ❱ 1736 │ │ │ return self._call_impl(*args, **kwargs) │
│ 1737 │ │
│ 1738 │ # torchrec tests the code consistency with the following code │
│ 1739 │ # fmt: off │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py:1747 in _call_impl │
│ │
│ 1744 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1745 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1746 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1747 │ │ │ return forward_call(*args, **kwargs) │
│ 1748 │ │ │
│ 1749 │ │ result = None │
│ 1750 │ │ called_always_called_hooks = set() │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:165 in new_forward │
│ │
│ 162 │ module._hf_hook = hook │
│ 163 │ │
│ 164 │ def new_forward(module, *args, **kwargs): │
│ ❱ 165 │ │ args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs) │
│ 166 │ │ if module._hf_hook.no_grad: │
│ 167 │ │ │ with torch.no_grad(): │
│ 168 │ │ │ │ output = module._old_forward(*args, **kwargs) │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/hooks.py:336 in pre_forward │
│ │
│ 333 │ │ │ │ remove_non_persistent=True, │
│ 334 │ │ │ ): │
│ 335 │ │ │ │ fp16_statistics = None │
│ ❱ 336 │ │ │ │ value = self.weights_map[name] │
│ 337 │ │ │ │ if "weight" in name and name.replace("weight", "SCB") in self.weights_ma │
│ 338 │ │ │ │ │ if value.dtype == torch.int8: │
│ 339 │ │ │ │ │ │ fp16_statistics = self.weights_map[name.replace("weight", "SCB") │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/utils/offload.py:118 in getitem │
│ │
│ 115 │ │ self.prefix = prefix │
│ 116 │ │
│ 117 │ def getitem(self, key): │
│ ❱ 118 │ │ return self.dataset[f"{self.prefix}{key}"] │
│ 119 │ │
│ 120 │ def iter(self): │
│ 121 │ │ return iter([key for key in self.dataset if key.startswith(self.prefix)]) │
│ │
│ /Users/itstamen/Documents/workspace/ai/GLM-4/.venv/lib/python3.10/site-packages/accelerate/utils/offload.py:165 in getitem │
│ │
│ 162 │ │ # State dict gets priority │
│ 163 │ │ if key in self.state_dict: │
│ 164 │ │ │ return self.state_dict[key] │
│ ❱ 165 │ │ weight_info = self.index[key] │
│ 166 │ │ if weight_info.get("safetensors_file") is not None: │
│ 167 │ │ │ device = "cpu" if self.device is None else self.device │
│ 168 │ │ │ tensor = None │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
请问是什么原因造成的,有什么解决思路和方法,谢谢!
Who can help? / 谁可以帮助到您?
@zRzRzRzRzRzRzR @wwewwt @Sengxian
Information / 问题信息
Reproduction / 复现过程
python finetune.py data/AdvertiseGen/ THUDM/glm-4-9b-chat configs/lora.yaml
微后后产生了3个保存点:
而后使用微调后的模型:
python inference.py out/checkpoint-30
Expected behavior / 期待表现
希望可以正常启动微调后的模型。
The text was updated successfully, but these errors were encountered: