File size: 3,058 Bytes
cfcd94b
 
 
 
 
e0b9104
 
 
 
 
 
 
 
 
 
cfcd94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fccd3a2
cfcd94b
 
 
 
 
 
 
 
 
fccd3a2
cfcd94b
 
 
 
 
 
 
 
fccd3a2
 
 
 
cfcd94b
fccd3a2
cfcd94b
fccd3a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
import os
import torch

os.system('pip install dashscope')
from http import HTTPStatus
import dashscope
from dashscope import Generation
from dashscope.api_entities.dashscope_response import Role
from typing import List, Optional, Tuple, Dict
from urllib.error import HTTPError
default_system = 'You are a helpful assistant.'

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# Aseg煤rate de que tu token de Hugging Face est谩 cargado como una variable de entorno
hf_token = os.environ.get("token")
if hf_token is not None:
    from huggingface_hub import HfFolder
    HfFolder.save_token(hf_token)
else:
    print("No se encontr贸 el token de Hugging Face. Aseg煤rate de que la variable de entorno HF_TOKEN est茅 configurada.")

# Configuraci贸n inicial
tokenizer = AutoTokenizer.from_pretrained("Juliofc/chaterapia_model")
model_base = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it").to(device)
model_base.resize_token_embeddings(len(tokenizer))
model_with_adapter = PeftModel.from_pretrained(model_base, "Juliofc/chaterapia_model").to(device)

CHAT_TEMPLATE= """{% for message in messages %}
    {% if message['role'] == 'user' %}
        {{'<user> ' + message['content'].strip() + ' </user>' }}
    {% elif message['role'] == 'system' %}
        {{'<system>\\n' + message['content'].strip() + '\\n</system>\\n\\n' }}
    {% elif message['role'] == 'assistant' %}
        {{ message['content'].strip() + ' </assistant>' + eos_token }}
    {% elif message['role'] == 'input' %}
        {{'<input> ' + message['content'] + ' </input>' }}
    {% endif %}
{% endfor %}""" # Aseg煤rate de usar tu CHAT_TEMPLATE aqu铆
tokenizer.chat_template = CHAT_TEMPLATE

chat_history = []
# Funci贸n para generar respuestas del modelo
def generate_response(user_input, chat_history):
    # Preparar el input agregando el historial de chat
    chat_history.append({"content": user_input, "role": "user"})
    user_input  = tokenizer.apply_chat_template(chat_history, tokenize=False)
    
    input_tokens = tokenizer(user_input, return_tensors='pt', padding=True, truncation=True, max_length=1024).to(device)
    
    # Generar la respuesta
    output_tokens = model_with_adapter.generate(**input_tokens, max_length=1024, pad_token_id=tokenizer.eos_token_id, top_k=50, do_sample=True, top_p=0.95, temperature=0.7)
    generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)

    last_us = generated_text.rfind("</user>") + len("</user>")
    last_as = generated_text.rfind("</assistant>")
    generated_text = generated_text[last_us:last_as].strip() 
    chat_history.append({"content": generated_text, "role": "assistant"})
    return generated_text, chat_history
    
def response(user_input, chat_history):
    response, chat_history = generate_response(user_input, chat_history)
    print(chat_history)
    return response

iface = gr.ChatInterface(fn=response, inputs="text", outputs="text")

iface.launch()