Spaces:

Ozaii
/

W.AI-8B-Uncensored

Runtime error

App Files Files Community

W.AI-8B-Uncensored / app.py

Ozaii

Update app.py

e422471 verified about 1 year ago

raw

history blame

3.21 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import spaces
	# Load the model and tokenizer from Hugging Face
	model_path = "Ozaii/Wali-8B-Uncensored-Model" # Replace with your model path
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	torch_dtype=torch.float16,
	)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	model.eval()
	# Set initial prompt to ensure desired behavior
	initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
	"use irregular language, abbreviations, swear words, and mimic the conversational style "
	"of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
	"You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
	"Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.")
	@spaces.GPU
	def generate_response(user_input, chat_history):
	max_context_length = 4096
	max_response_length = 1536
	prompt = initial_prompt + "\n"
	for message in chat_history:
	if message[0] is not None:
	prompt += f"User: {message[0]}\n"
	if message[1] is not None:
	prompt += f"Assistant: {message[1]}\n"
	prompt += f"User: {user_input}\nAssistant:"
	prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
	if len(prompt_tokens) > max_context_length:
	prompt_tokens = prompt_tokens[-max_context_length:]
	prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model.generate(
	inputs.input_ids,
	max_length=max_response_length,
	min_length=48,
	temperature=0.55,
	top_k=30,
	top_p=0.5,
	repetition_penalty=1.2,
	no_repeat_ngram_size=3,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	assistant_response = response.split("Assistant:")[-1].strip()
	assistant_response = assistant_response.split('\n')[0].strip()
	chat_history.append((user_input, assistant_response))
	return chat_history, chat_history
	def restart_chat():
	return [], []
	with gr.Blocks() as chat_interface:
	gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
	chat_history = gr.State([])
	with gr.Column():
	chatbox = gr.Chatbot()
	with gr.Row():
	user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
	submit_button = gr.Button("Send")
	restart_button = gr.Button("Restart")
	submit_button.click(
	generate_response,
	inputs=[user_input, chat_history],
	outputs=[chatbox, chat_history]
	)
	restart_button.click(
	restart_chat,
	inputs=[],
	outputs=[chatbox, chat_history]
	)
	chat_interface.launch(share=True)