chaterapia_demo / app.py
Juliofc's picture
Update app.py
fccd3a2 verified
raw
history blame
3.06 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
import os
import torch
os.system('pip install dashscope')
from http import HTTPStatus
import dashscope
from dashscope import Generation
from dashscope.api_entities.dashscope_response import Role
from typing import List, Optional, Tuple, Dict
from urllib.error import HTTPError
default_system = 'You are a helpful assistant.'
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# Asegúrate de que tu token de Hugging Face está cargado como una variable de entorno
hf_token = os.environ.get("token")
if hf_token is not None:
from huggingface_hub import HfFolder
HfFolder.save_token(hf_token)
else:
print("No se encontró el token de Hugging Face. Asegúrate de que la variable de entorno HF_TOKEN esté configurada.")
# Configuración inicial
tokenizer = AutoTokenizer.from_pretrained("Juliofc/chaterapia_model")
model_base = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it").to(device)
model_base.resize_token_embeddings(len(tokenizer))
model_with_adapter = PeftModel.from_pretrained(model_base, "Juliofc/chaterapia_model").to(device)
CHAT_TEMPLATE= """{% for message in messages %}
{% if message['role'] == 'user' %}
{{'<user> ' + message['content'].strip() + ' </user>' }}
{% elif message['role'] == 'system' %}
{{'<system>\\n' + message['content'].strip() + '\\n</system>\\n\\n' }}
{% elif message['role'] == 'assistant' %}
{{ message['content'].strip() + ' </assistant>' + eos_token }}
{% elif message['role'] == 'input' %}
{{'<input> ' + message['content'] + ' </input>' }}
{% endif %}
{% endfor %}""" # Asegúrate de usar tu CHAT_TEMPLATE aquí
tokenizer.chat_template = CHAT_TEMPLATE
chat_history = []
# Función para generar respuestas del modelo
def generate_response(user_input, chat_history):
# Preparar el input agregando el historial de chat
chat_history.append({"content": user_input, "role": "user"})
user_input = tokenizer.apply_chat_template(chat_history, tokenize=False)
input_tokens = tokenizer(user_input, return_tensors='pt', padding=True, truncation=True, max_length=1024).to(device)
# Generar la respuesta
output_tokens = model_with_adapter.generate(**input_tokens, max_length=1024, pad_token_id=tokenizer.eos_token_id, top_k=50, do_sample=True, top_p=0.95, temperature=0.7)
generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
last_us = generated_text.rfind("</user>") + len("</user>")
last_as = generated_text.rfind("</assistant>")
generated_text = generated_text[last_us:last_as].strip()
chat_history.append({"content": generated_text, "role": "assistant"})
return generated_text, chat_history
def response(user_input, chat_history):
response, chat_history = generate_response(user_input, chat_history)
print(chat_history)
return response
iface = gr.ChatInterface(fn=response, inputs="text", outputs="text")
iface.launch()