Spaces:
Runtime error
Runtime error
File size: 3,058 Bytes
cfcd94b e0b9104 cfcd94b fccd3a2 cfcd94b fccd3a2 cfcd94b fccd3a2 cfcd94b fccd3a2 cfcd94b fccd3a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import gradio as gr
import os
import torch
os.system('pip install dashscope')
from http import HTTPStatus
import dashscope
from dashscope import Generation
from dashscope.api_entities.dashscope_response import Role
from typing import List, Optional, Tuple, Dict
from urllib.error import HTTPError
default_system = 'You are a helpful assistant.'
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# Aseg煤rate de que tu token de Hugging Face est谩 cargado como una variable de entorno
hf_token = os.environ.get("token")
if hf_token is not None:
from huggingface_hub import HfFolder
HfFolder.save_token(hf_token)
else:
print("No se encontr贸 el token de Hugging Face. Aseg煤rate de que la variable de entorno HF_TOKEN est茅 configurada.")
# Configuraci贸n inicial
tokenizer = AutoTokenizer.from_pretrained("Juliofc/chaterapia_model")
model_base = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it").to(device)
model_base.resize_token_embeddings(len(tokenizer))
model_with_adapter = PeftModel.from_pretrained(model_base, "Juliofc/chaterapia_model").to(device)
CHAT_TEMPLATE= """{% for message in messages %}
{% if message['role'] == 'user' %}
{{'<user> ' + message['content'].strip() + ' </user>' }}
{% elif message['role'] == 'system' %}
{{'<system>\\n' + message['content'].strip() + '\\n</system>\\n\\n' }}
{% elif message['role'] == 'assistant' %}
{{ message['content'].strip() + ' </assistant>' + eos_token }}
{% elif message['role'] == 'input' %}
{{'<input> ' + message['content'] + ' </input>' }}
{% endif %}
{% endfor %}""" # Aseg煤rate de usar tu CHAT_TEMPLATE aqu铆
tokenizer.chat_template = CHAT_TEMPLATE
chat_history = []
# Funci贸n para generar respuestas del modelo
def generate_response(user_input, chat_history):
# Preparar el input agregando el historial de chat
chat_history.append({"content": user_input, "role": "user"})
user_input = tokenizer.apply_chat_template(chat_history, tokenize=False)
input_tokens = tokenizer(user_input, return_tensors='pt', padding=True, truncation=True, max_length=1024).to(device)
# Generar la respuesta
output_tokens = model_with_adapter.generate(**input_tokens, max_length=1024, pad_token_id=tokenizer.eos_token_id, top_k=50, do_sample=True, top_p=0.95, temperature=0.7)
generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
last_us = generated_text.rfind("</user>") + len("</user>")
last_as = generated_text.rfind("</assistant>")
generated_text = generated_text[last_us:last_as].strip()
chat_history.append({"content": generated_text, "role": "assistant"})
return generated_text, chat_history
def response(user_input, chat_history):
response, chat_history = generate_response(user_input, chat_history)
print(chat_history)
return response
iface = gr.ChatInterface(fn=response, inputs="text", outputs="text")
iface.launch() |