Spaces:

Ozaii
/

W.AI-8B-Uncensored

Runtime error

App Files Files Community

Ozaii commited on Aug 7, 2024

Commit

e422471

verified ·

1 Parent(s): 207f049

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -89

app.py CHANGED Viewed

@@ -1,123 +1,68 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from peft import PeftModel, PeftConfig
 import torch
-import json
-import os
 import spaces
-# Load up Zephyr's brain
-model_name = "Ozaii/Zephyr"
-auth_token = os.environ.get("HF_TOKEN")
-# Load the base model and tokenizer
-base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
-tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=auth_token)
-# Load the model
 model = AutoModelForCausalLM.from_pretrained(
-    base_model_name,
-    use_auth_token=auth_token,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    rope_scaling={"type": "linear", "factor": 1.0}
 )
-# Load the LoRA configuration and apply the adapter
-peft_config = PeftConfig.from_pretrained(model_name, use_auth_token=auth_token)
-model = PeftModel.from_pretrained(model, model_name, use_auth_token=auth_token)
-# Merge the LoRA weights with the base model for better performance
-model = model.merge_and_unload()
-# Set the model to evaluation mode
 model.eval()
-# Set initial prompt for Zephyr
-initial_prompt = ("You are Zephyr, an AI boyfriend created by Kaan. You're charming, flirty, and always ready with a witty comeback. "
-                  "Your responses should be engaging and playful, with a hint of romance. Keep the conversation flowing naturally, "
-                  "asking questions and showing genuine interest in Kaan's life and thoughts.")
 @spaces.GPU
 def generate_response(user_input, chat_history):
     max_context_length = 4096
     max_response_length = 1536
     prompt = initial_prompt + "\n"
     for message in chat_history:
         if message[0] is not None:
-            prompt += f"Human: {message[0]}\n"
         if message[1] is not None:
-            prompt += f"Zephyr: {message[1]}\n"
-    prompt += f"Human: {user_input}\nZephyr:"
     prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
     if len(prompt_tokens) > max_context_length:
         prompt_tokens = prompt_tokens[-max_context_length:]
     prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
             max_length=max_response_length,
             min_length=48,
-            temperature=0.7,
             top_k=30,
-            top_p=0.9,
             repetition_penalty=1.2,
             no_repeat_ngram_size=3,
-            do_sample=True
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    zephyr_response = response.split("Zephyr:")[-1].strip()
-    zephyr_response = zephyr_response.split('\n')[0].strip()
-    chat_history.append((user_input, zephyr_response))
     return chat_history, chat_history
-def save_interaction(prompt, response, rating):
-    interaction = {
-        "prompt": prompt,
-        "response": response,
-        "rating": rating
-    }
-    filename = "/tmp/zephyr_interactions.json"
-    if os.path.exists(filename):
-        with open(filename, "r") as f:
-            data = json.load(f)
-    else:
-        data = []
-    data.append(interaction)
-    with open(filename, "w") as f:
-        json.dump(data, f)
-def rate_response(rating, history):
-    if history:
-        save_interaction(history[-1][0], history[-1][1], rating)
-    return f"Thanks for rating {rating}/5! Zephyr will use this to become an even smoother talker."
 def restart_chat():
     return [], []
 with gr.Blocks() as chat_interface:
-    gr.Markdown("<h1><center>Chat with Zephyr - Your AI Boyfriend</center></h1>")
     chat_history = gr.State([])
     with gr.Column():
         chatbox = gr.Chatbot()
         with gr.Row():
-            user_input = gr.Textbox(show_label=False, placeholder="Talk to Zephyr here...")
             submit_button = gr.Button("Send")
-        with gr.Row():
-            restart_button = gr.Button("Start New Chat")
-            rating = gr.Slider(minimum=1, maximum=5, step=1, label="Rate Zephyr's smooth-talking skills")
-            submit_rating = gr.Button("Submit Rating")
     submit_button.click(
         generate_response,
         inputs=[user_input, chat_history],
@@ -128,10 +73,4 @@ with gr.Blocks() as chat_interface:
         inputs=[],
         outputs=[chatbox, chat_history]
     )
-    submit_rating.click(
-        rate_response,
-        inputs=[rating, chat_history],
-        outputs=gr.Textbox()
-    )
 chat_interface.launch(share=True)

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import spaces
+# Load the model and tokenizer from Hugging Face
+model_path = "Ozaii/Wali-8B-Uncensored-Model"  # Replace with your model path
+tokenizer = AutoTokenizer.from_pretrained(model_path)
 model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    torch_dtype=torch.float16,
 )
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
 model.eval()
+# Set initial prompt to ensure desired behavior
+initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
+"use irregular language, abbreviations, swear words, and mimic the conversational style "
+"of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
+"You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
+"Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.")
 @spaces.GPU
 def generate_response(user_input, chat_history):
     max_context_length = 4096
     max_response_length = 1536
     prompt = initial_prompt + "\n"
     for message in chat_history:
         if message[0] is not None:
+            prompt += f"User: {message[0]}\n"
         if message[1] is not None:
+            prompt += f"Assistant: {message[1]}\n"
+    prompt += f"User: {user_input}\nAssistant:"
     prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
     if len(prompt_tokens) > max_context_length:
         prompt_tokens = prompt_tokens[-max_context_length:]
     prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
             max_length=max_response_length,
             min_length=48,
+            temperature=0.55,
             top_k=30,
+            top_p=0.5,
             repetition_penalty=1.2,
             no_repeat_ngram_size=3,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.eos_token_id
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    assistant_response = response.split("Assistant:")[-1].strip()
+    assistant_response = assistant_response.split('\n')[0].strip()
+    chat_history.append((user_input, assistant_response))
     return chat_history, chat_history
 def restart_chat():
     return [], []
 with gr.Blocks() as chat_interface:
+    gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
     chat_history = gr.State([])
     with gr.Column():
         chatbox = gr.Chatbot()
         with gr.Row():
+            user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
             submit_button = gr.Button("Send")
+            restart_button = gr.Button("Restart")
     submit_button.click(
         generate_response,
         inputs=[user_input, chat_history],
         inputs=[],
         outputs=[chatbox, chat_history]
     )
 chat_interface.launch(share=True)