os.environ["CUDA_VISIBLE_DEVICES"] = "2"
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer
output_dir = "./output/0308_debug_format_for_opensource/checkpoint-561"
peft_model_path = os.path.join(output_dir, "sft_lora_model") # change checkpoint path
peftconfig = PeftConfig.from_pretrained(peft_model_path)
model_base = LlamaForCausalLM.from_pretrained(peftconfig.base_model_name_or_path,
device_map = "auto",
)
tokenizer = LlamaTokenizer.from_pretrained(peftconfig.base_model_name_or_path,
add_bos_token = True,
add_eos_token = False # always False for inference
)
new_model = PeftModel.from_pretrained(model_base, peft_model_path)
print("Peft model loaded")
import torch
def generate_response(prompt, model):
encoded_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
task_types=0
task_types = torch.tensor(data = task_types)
encoded_input["task_types"]=task_types
model_inputs = encoded_input.to('cuda')
print(model_inputs)
generated_ids = model.generate(**model_inputs,
max_new_tokens=20,
min_new_tokens=1,
do_sample=False,
pad_token_id=tokenizer.eos_token_id)
decoded_output = tokenizer.batch_decode(generated_ids)
return decoded_output[0].replace(prompt, "")
prompt = """Spanish: Período de validez después de abierto el envase: 10 horas.
English:"""
generate_response(prompt, new_model)
{'input_ids': tensor([[ 1, 10432, 29901, 2431, 29983, 8144, 316, 659, 680, 29920,
11006, 316, 633, 25449, 560, 8829, 559, 29901, 29871, 29896,
29900, 4029, 294, 29889, 13, 24636, 29901]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1]], device='cuda:0'), 'task_types': tensor(0, device='cuda:0')}
Output exceeds the [size limit](command:workbench.action.openSettings?[). Open the full output data [in a text editor](command:workbench.action.openLargeOutput?4edebf64-8b8d-479e-88d4-531631ec5757)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[4], line 22
17 return decoded_output[0].replace(prompt, "")
19 prompt = """Spanish: Período de validez después de abierto el envase: 10 horas.
20 English:"""
---> 22 generate_response(prompt, new_model)
Cell In[4], line 9, in generate_response(prompt, model)
7 model_inputs = encoded_input.to('cuda')
8 print(model_inputs)
----> 9 generated_ids = model.generate(**model_inputs,
10 max_new_tokens=20,
11 min_new_tokens=1,
12 do_sample=False,
13 pad_token_id=tokenizer.eos_token_id)
15 decoded_output = tokenizer.batch_decode(generated_ids)
17 return decoded_output[0].replace(prompt, "")
File ~/LoRAMoE/peft/peft_model.py:587, in PeftModelForCausalLM.generate(self, **kwargs)
585 self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generation
586 try:
--> 587 outputs = self.base_model.generate(**kwargs)
588 except:
589 self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
...
-> 2349 next_token_logits = outputs.logits[:, -1, :]
2351 # pre-process distribution
2352 next_tokens_scores = logits_processor(input_ids, next_token_logits)
AttributeError: 'str' object has no attribute 'logits'