RuntimeError Traceback (most recent call last)
File <timed exec>:1
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/utils/_contextlib.py:115](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/utils/_contextlib.py:115), in context_decorator.<locals>.decorate_context(*args, **kwargs)
[112](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/utils/_contextlib.py:112) @functools.wraps(func)
[113](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/utils/_contextlib.py:113) def decorate_context(*args, **kwargs):
[114](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/utils/_contextlib.py:114) with ctx_factory():
--> [115](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/utils/_contextlib.py:115) return func(*args, **kwargs)
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1673](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1673), in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
[1656](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1656) return self.assisted_decoding(
[1657](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1657) input_ids,
[1658](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1658) assistant_model=assistant_model,
ref='~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:0'>0</a>;32m (...)
[1669](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1669) **model_kwargs,
[1670](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1670) )
[1671](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1671) if generation_mode == GenerationMode.GREEDY_SEARCH:
[1672](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1672) # 11. run greedy search
-> [1673](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1673) return self.greedy_search(
[1674](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1674) input_ids,
[1675](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1675) logits_processor=logits_processor,
[1676](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1676) stopping_criteria=stopping_criteria,
[1677](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1677) pad_token_id=generation_config.pad_token_id,
[1678](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1678) eos_token_id=generation_config.eos_token_id,
[1679](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1679) output_scores=generation_config.output_scores,
[1680](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1680) return_dict_in_generate=generation_config.return_dict_in_generate,
[1681](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1681) synced_gpus=synced_gpus,
[1682](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1682) streamer=streamer,
[1683](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1683) **model_kwargs,
[1684](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1684) )
[1686](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1686) elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH:
[1687](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:1687) if not model_kwargs["use_cache"]:
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2521](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2521), in GenerationMixin.greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
[2518](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2518) model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
[2520](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2520) # forward pass to get next token
-> [2521](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2521) outputs = self(
[2522](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2522) **model_inputs,
[2523](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2523) return_dict=True,
[2524](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2524) output_attentions=output_attentions,
[2525](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2525) output_hidden_states=output_hidden_states,
[2526](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2526) )
[2528](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2528) if synced_gpus and this_peer_finished:
[2529](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/generation/utils.py:2529) continue # don't waste resources running the code we don't need
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501), in Module._call_impl(self, *args, **kwargs)
[1496](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1496) # If we don't have any hooks, we want to skip the rest of the logic in
[1497](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1497) # this function, and just call forward.
[1498](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1498) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
[1499](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1499) or _global_backward_pre_hooks or _global_backward_hooks
[1500](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1500) or _global_forward_hooks or _global_forward_pre_hooks):
-> [1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501) return forward_call(*args, **kwargs)
[1502](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1502) # Do not call functions when jit is used
[1503](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1503) full_backward_hooks, non_full_backward_hooks = [], []
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164), in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
[162](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:162) output = module._old_forward(*args, **kwargs)
[163](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:163) else:
--> [164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164) output = module._old_forward(*args, **kwargs)
[165](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:165) return module._hf_hook.post_forward(module, output)
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1034](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1034), in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
[1031](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1031) return_dict = return_dict if return_dict is not None else self.config.use_return_dict
[1033](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1033) # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
-> [1034](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1034) outputs = self.model(
[1035](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1035) input_ids=input_ids,
[1036](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1036) attention_mask=attention_mask,
[1037](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1037) position_ids=position_ids,
[1038](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1038) past_key_values=past_key_values,
[1039](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1039) inputs_embeds=inputs_embeds,
[1040](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1040) use_cache=use_cache,
[1041](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1041) output_attentions=output_attentions,
[1042](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1042) output_hidden_states=output_hidden_states,
[1043](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1043) return_dict=return_dict,
[1044](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1044) )
[1046](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1046) hidden_states = outputs[0]
[1047](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:1047) if self.config.pretraining_tp > 1:
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501), in Module._call_impl(self, *args, **kwargs)
[1496](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1496) # If we don't have any hooks, we want to skip the rest of the logic in
[1497](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1497) # this function, and just call forward.
[1498](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1498) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
[1499](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1499) or _global_backward_pre_hooks or _global_backward_hooks
[1500](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1500) or _global_forward_hooks or _global_forward_pre_hooks):
-> [1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501) return forward_call(*args, **kwargs)
[1502](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1502) # Do not call functions when jit is used
[1503](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1503) full_backward_hooks, non_full_backward_hooks = [], []
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:922](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:922), in LlamaModel.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)
[912](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:912) layer_outputs = self._gradient_checkpointing_func(
[913](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:913) decoder_layer.__call__,
[914](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:914) hidden_states,
ref='~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:0'>0</a>;32m (...)
[919](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:919) use_cache,
[920](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:920) )
[921](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:921) else:
--> [922](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:922) layer_outputs = decoder_layer(
[923](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:923) hidden_states,
[924](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:924) attention_mask=attention_mask,
[925](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:925) position_ids=position_ids,
[926](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:926) past_key_value=past_key_value,
[927](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:927) output_attentions=output_attentions,
[928](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:928) use_cache=use_cache,
[929](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:929) )
[931](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:931) hidden_states = layer_outputs[0]
[933](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:933) if use_cache:
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501), in Module._call_impl(self, *args, **kwargs)
[1496](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1496) # If we don't have any hooks, we want to skip the rest of the logic in
[1497](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1497) # this function, and just call forward.
[1498](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1498) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
[1499](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1499) or _global_backward_pre_hooks or _global_backward_hooks
[1500](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1500) or _global_forward_hooks or _global_forward_pre_hooks):
-> [1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501) return forward_call(*args, **kwargs)
[1502](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1502) # Do not call functions when jit is used
[1503](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1503) full_backward_hooks, non_full_backward_hooks = [], []
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164), in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
[162](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:162) output = module._old_forward(*args, **kwargs)
[163](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:163) else:
--> [164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164) output = module._old_forward(*args, **kwargs)
[165](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:165) return module._hf_hook.post_forward(module, output)
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:672](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:672), in LlamaDecoderLayer.forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)
[669](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:669) hidden_states = self.input_layernorm(hidden_states)
[671](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:671) # Self Attention
--> [672](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:672) hidden_states, self_attn_weights, present_key_value = self.self_attn(
[673](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:673) hidden_states=hidden_states,
[674](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:674) attention_mask=attention_mask,
[675](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:675) position_ids=position_ids,
[676](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:676) past_key_value=past_key_value,
[677](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:677) output_attentions=output_attentions,
[678](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:678) use_cache=use_cache,
[679](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:679) **kwargs,
[680](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:680) )
[681](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:681) hidden_states = residual + hidden_states
[683](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:683) # Fully Connected
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501), in Module._call_impl(self, *args, **kwargs)
[1496](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1496) # If we don't have any hooks, we want to skip the rest of the logic in
[1497](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1497) # this function, and just call forward.
[1498](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1498) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
[1499](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1499) or _global_backward_pre_hooks or _global_backward_hooks
[1500](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1500) or _global_forward_hooks or _global_forward_pre_hooks):
-> [1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501) return forward_call(*args, **kwargs)
[1502](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1502) # Do not call functions when jit is used
[1503](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1503) full_backward_hooks, non_full_backward_hooks = [], []
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164), in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
[162](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:162) output = module._old_forward(*args, **kwargs)
[163](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:163) else:
--> [164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164) output = module._old_forward(*args, **kwargs)
[165](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:165) return module._hf_hook.post_forward(module, output)
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:366](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:366), in LlamaAttention.forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)
[363](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:363) value_states = torch.cat(value_states, dim=-1)
[365](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:365) else:
--> [366](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:366) query_states = self.q_proj(hidden_states)
[367](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:367) key_states = self.k_proj(hidden_states)
[368](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py:368) value_states = self.v_proj(hidden_states)
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501), in Module._call_impl(self, *args, **kwargs)
[1496](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1496) # If we don't have any hooks, we want to skip the rest of the logic in
[1497](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1497) # this function, and just call forward.
[1498](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1498) if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
[1499](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1499) or _global_backward_pre_hooks or _global_backward_hooks
[1500](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1500) or _global_forward_hooks or _global_forward_pre_hooks):
-> [1501](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1501) return forward_call(*args, **kwargs)
[1502](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1502) # Do not call functions when jit is used
[1503](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py:1503) full_backward_hooks, non_full_backward_hooks = [], []
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164), in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
[162](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:162) output = module._old_forward(*args, **kwargs)
[163](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:163) else:
--> [164](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:164) output = module._old_forward(*args, **kwargs)
[165](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/accelerate/hooks.py:165) return module._hf_hook.post_forward(module, output)
File [~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/linear.py:114](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/linear.py:114), in Linear.forward(self, input)
[113](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/linear.py:113) def forward(self, input: Tensor) -> Tensor:
--> [114](https://vscode-remote+ssh-002dremote-002b7b22686f73744e616d65223a224c6162373038227d.vscode-resource.vscode-cdn.net/data2/home/ruiqi/GoLLIE/notebooks/~/anaconda3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/linear.py:114) return F.linear(input, self.weight, self.bias)
RuntimeError: expected scalar type Float but found BFloat16