Spaces:
Runtime error
Runtime error
File size: 3,211 Bytes
610112a e422471 610112a 672778b e422471 610112a e422471 610112a e422471 610112a e422471 672778b 939217d 309543c b72fb39 610112a e422471 a945abb e422471 a945abb 939217d a945abb e422471 a945abb 309543c 10bf52a e422471 c3924d9 e422471 f7f6623 a945abb e422471 a945abb e422471 88b0d07 a945abb 939217d a945abb e422471 a945abb e422471 a945abb e422471 a945abb 939217d 88b0d07 a945abb c02bdbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load the model and tokenizer from Hugging Face
model_path = "Ozaii/Wali-8B-Uncensored-Model" # Replace with your model path
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Set initial prompt to ensure desired behavior
initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
"use irregular language, abbreviations, swear words, and mimic the conversational style "
"of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
"You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
"Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.")
@spaces.GPU
def generate_response(user_input, chat_history):
max_context_length = 4096
max_response_length = 1536
prompt = initial_prompt + "\n"
for message in chat_history:
if message[0] is not None:
prompt += f"User: {message[0]}\n"
if message[1] is not None:
prompt += f"Assistant: {message[1]}\n"
prompt += f"User: {user_input}\nAssistant:"
prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
if len(prompt_tokens) > max_context_length:
prompt_tokens = prompt_tokens[-max_context_length:]
prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=max_response_length,
min_length=48,
temperature=0.55,
top_k=30,
top_p=0.5,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
assistant_response = response.split("Assistant:")[-1].strip()
assistant_response = assistant_response.split('\n')[0].strip()
chat_history.append((user_input, assistant_response))
return chat_history, chat_history
def restart_chat():
return [], []
with gr.Blocks() as chat_interface:
gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
chat_history = gr.State([])
with gr.Column():
chatbox = gr.Chatbot()
with gr.Row():
user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
submit_button = gr.Button("Send")
restart_button = gr.Button("Restart")
submit_button.click(
generate_response,
inputs=[user_input, chat_history],
outputs=[chatbox, chat_history]
)
restart_button.click(
restart_chat,
inputs=[],
outputs=[chatbox, chat_history]
)
chat_interface.launch(share=True) |