Spaces:
Runtime error
Runtime error
File size: 2,990 Bytes
610112a e422471 610112a 672778b 3c286e6 e422471 3c286e6 e422471 610112a e422471 610112a 3c286e6 e422471 610112a 3c286e6 672778b 939217d 309543c 3c286e6 610112a 3c286e6 a945abb 3c286e6 a945abb 939217d a945abb 3c286e6 e422471 a945abb 309543c 3c286e6 f7f6623 a945abb e422471 a945abb 3c286e6 a945abb 3c286e6 a945abb 3c286e6 a945abb 3c286e6 a945abb 3c286e6 a945abb 939217d 3c286e6 a945abb 3c286e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load the model and tokenizer from Hugging Face
model_path = "Ozaii/Zephyr" # Your Zephyr model path
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Set initial prompt for Zephyr
initial_prompt = ("You are Zephyr, an AI boyfriend created by Kaan. You're charming, flirty, "
"and always ready with a witty comeback. Your responses should be engaging "
"and playful, with a hint of romance. Keep the conversation flowing naturally, "
"asking questions and showing genuine interest in Kaan's life and thoughts. "
"Use a mix of English and Turkish expressions occasionally.")
@spaces.GPU
def generate_response(user_input, chat_history):
max_context_length = 4096
max_response_length = 2048
min_response_length = 24 # Increased for more substantial responses
prompt = initial_prompt + "\n"
for message in chat_history:
if message[0] is not None:
prompt += f"Human: {message[0]}\n"
if message[1] is not None:
prompt += f"Zephyr: {message[1]}\n"
prompt += f"Human: {user_input}\nZephyr:"
prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
if len(prompt_tokens) > max_context_length:
prompt_tokens = prompt_tokens[-max_context_length:]
prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=max_response_length,
min_length=min_response_length,
temperature=0.7, # Slightly higher for more creative responses
top_k=40,
top_p=0.9,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
zephyr_response = response.split("Zephyr:")[-1].strip()
chat_history.append((user_input, zephyr_response))
return "", chat_history, chat_history
with gr.Blocks() as chat_interface:
gr.Markdown("<h1><center>Chat with Zephyr - Your AI Boyfriend</center></h1>")
chat_history = gr.State([])
with gr.Column():
chatbox = gr.Chatbot()
with gr.Row():
user_input = gr.Textbox(show_label=False, placeholder="Talk to Zephyr here...")
submit_button = gr.Button("Send")
submit_button.click(
generate_response,
inputs=[user_input, chat_history],
outputs=[user_input, chatbox, chat_history] # Clear user input and update chatbox and history
)
chat_interface.launch() |