Spaces:

Ozaii
/

W.AI-8B-Uncensored

Runtime error

File size: 2,990 Bytes

610112a
e422471
610112a
672778b
3c286e6
e422471
3c286e6
e422471
610112a
e422471
 
610112a
3c286e6
e422471
 
610112a
3c286e6
 
 
 
 
 
 
 
672778b
939217d
309543c
3c286e6
 
 
610112a
 
 
3c286e6
a945abb
3c286e6
 
 
a945abb
 
939217d
a945abb
3c286e6
e422471
a945abb
 
 
309543c
3c286e6
 
 
 
f7f6623
a945abb
e422471
 
a945abb
3c286e6
a945abb
3c286e6
 
 
 
 
 
a945abb
3c286e6
a945abb
 
 
 
3c286e6
a945abb
3c286e6
a945abb
 
939217d
3c286e6
a945abb
3c286e6

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces

# Load the model and tokenizer from Hugging Face
model_path = "Ozaii/Zephyr"  # Your Zephyr model path
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Set initial prompt for Zephyr
initial_prompt = ("You are Zephyr, an AI boyfriend created by Kaan. You're charming, flirty, "
"and always ready with a witty comeback. Your responses should be engaging "
"and playful, with a hint of romance. Keep the conversation flowing naturally, "
"asking questions and showing genuine interest in Kaan's life and thoughts. "
"Use a mix of English and Turkish expressions occasionally.")

@spaces.GPU
def generate_response(user_input, chat_history):
    max_context_length = 4096
    max_response_length = 2048
    min_response_length = 24  # Increased for more substantial responses

    prompt = initial_prompt + "\n"
    for message in chat_history:
        if message[0] is not None:
            prompt += f"Human: {message[0]}\n"
        if message[1] is not None:
            prompt += f"Zephyr: {message[1]}\n"
    prompt += f"Human: {user_input}\nZephyr:"

    prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
    if len(prompt_tokens) > max_context_length:
        prompt_tokens = prompt_tokens[-max_context_length:]
    prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            inputs.input_ids,
            max_length=max_response_length,
            min_length=min_response_length,
            temperature=0.7,  # Slightly higher for more creative responses
            top_k=40,
            top_p=0.9,
            repetition_penalty=1.2,
            no_repeat_ngram_size=3,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    zephyr_response = response.split("Zephyr:")[-1].strip()

    chat_history.append((user_input, zephyr_response))

    return "", chat_history, chat_history

with gr.Blocks() as chat_interface:
    gr.Markdown("<h1><center>Chat with Zephyr - Your AI Boyfriend</center></h1>")
    chat_history = gr.State([])
    with gr.Column():
        chatbox = gr.Chatbot()
        with gr.Row():
            user_input = gr.Textbox(show_label=False, placeholder="Talk to Zephyr here...")
            submit_button = gr.Button("Send")

    submit_button.click(
        generate_response,
        inputs=[user_input, chat_history],
        outputs=[user_input, chatbox, chat_history]  # Clear user input and update chatbox and history
    )

chat_interface.launch()