Spaces:

Ozaii
/

W.AI-8B-Uncensored

Runtime error

File size: 3,211 Bytes

610112a
e422471
610112a
672778b
e422471
 
 
610112a
e422471
 
610112a
e422471
 
610112a
e422471
 
 
 
 
 
672778b
939217d
309543c
b72fb39
610112a
 
 
e422471
a945abb
e422471
 
a945abb
 
939217d
a945abb
e422471
a945abb
 
 
309543c
10bf52a
e422471
c3924d9
e422471
f7f6623
a945abb
e422471
 
a945abb
 
e422471
 
 
88b0d07
a945abb
939217d
a945abb
e422471
a945abb
 
 
 
e422471
a945abb
e422471
a945abb
 
939217d
88b0d07
a945abb
 
 
 
 
 
c02bdbf

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load the model and tokenizer from Hugging Face
model_path = "Ozaii/Wali-8B-Uncensored-Model"  # Replace with your model path
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Set initial prompt to ensure desired behavior
initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
"use irregular language, abbreviations, swear words, and mimic the conversational style "
"of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
"You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
"Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.")
@spaces.GPU
def generate_response(user_input, chat_history):
    max_context_length = 4096
    max_response_length = 1536
    prompt = initial_prompt + "\n"
    for message in chat_history:
        if message[0] is not None:
            prompt += f"User: {message[0]}\n"
        if message[1] is not None:
            prompt += f"Assistant: {message[1]}\n"
    prompt += f"User: {user_input}\nAssistant:"
    prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
    if len(prompt_tokens) > max_context_length:
        prompt_tokens = prompt_tokens[-max_context_length:]
    prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            inputs.input_ids,
            max_length=max_response_length,
            min_length=48,
            temperature=0.55,
            top_k=30,
            top_p=0.5,
            repetition_penalty=1.2,
            no_repeat_ngram_size=3,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_response = response.split("Assistant:")[-1].strip()
    assistant_response = assistant_response.split('\n')[0].strip()
    chat_history.append((user_input, assistant_response))
    return chat_history, chat_history
def restart_chat():
    return [], []
with gr.Blocks() as chat_interface:
    gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
    chat_history = gr.State([])
    with gr.Column():
        chatbox = gr.Chatbot()
        with gr.Row():
            user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
            submit_button = gr.Button("Send")
            restart_button = gr.Button("Restart")
    submit_button.click(
        generate_response,
        inputs=[user_input, chat_history],
        outputs=[chatbox, chat_history]
    )
    restart_button.click(
        restart_chat,
        inputs=[],
        outputs=[chatbox, chat_history]
    )
chat_interface.launch(share=True)