Spaces:

Ozaii
/

W.AI-8B-Uncensored

Runtime error

App Files Files Community

Ozaii commited on Jun 18, 2024

Commit

88b0d07

verified ·

1 Parent(s): 64ca2d3

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -43

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import spaces
-import json
 # Load the model and tokenizer from Hugging Face
 model_path = "Ozaii/Wali-8B-Uncensored-Model"  # Replace with your model path
@@ -19,16 +18,14 @@ model.eval()
 # Set initial prompt to ensure desired behavior
 initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
 "use irregular language, abbreviations, swear words, and mimic the conversational style "
-"of the assistant in the chat dataset. Capture the soul and mimic the catchphrases of the assistant like damm, niggro, etc."
 "You are just for chatting purposes, and your primary role is to copy the assistant's style and fun completely."
-"If the expected reply is a <follow-up> message, write a followup message continuing the last message. "
-"***ALWAYS ANSWER AS THE ASSISTANT***")
 @spaces.GPU
 def generate_response(user_input, chat_history):
     max_context_length = 4096
     max_response_length = 2048
-    min_response_length = 128
     prompt = initial_prompt + "\n"
     for message in chat_history:
@@ -48,11 +45,11 @@ def generate_response(user_input, chat_history):
         outputs = model.generate(
             inputs.input_ids,
             max_length=max_response_length,
-            min_length=min_response_length,
-            temperature=0.6,  # Slightly increased for more variation
-            top_k=30,
-            top_p=0.6,  # Increased to allow more variety
-            repetition_penalty=1.2,  # Increased slightly to reduce repetitiveness
             no_repeat_ngram_size=3,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id
@@ -60,38 +57,9 @@ def generate_response(user_input, chat_history):
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_response = response.split("Assistant:")[-1].strip()
-    # Ensure response meets the minimum length requirement
-    if len(assistant_response.split()) < min_response_length:
-        # Generate additional response to continue context
-        followup_prompt = f"{prompt} \***As the assistent, send a follow-up message to this message of yours:***{assistant_response}\nAssistant:<follow-up>"
-        followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
-        if len(followup_tokens) > max_context_length:
-            followup_tokens = followup_tokens[-max_context_length:]
-        followup_prompt = tokenizer.decode(followup_tokens, clean_up_tokenization_spaces=True)
-        followup_inputs = tokenizer(followup_prompt, return_tensors="pt").to(device)
-        with torch.no_grad():
-            additional_outputs = model.generate(
-                followup_inputs.input_ids,
-                max_length=max_response_length,
-                temperature=0.55,
-                top_k=25,
-                top_p=0.55,
-                repetition_penalty=1.2,
-                no_repeat_ngram_size=3,
-                eos_token_id=tokenizer.eos_token_id,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
-        additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
-        chat_history.append((user_input, assistant_response))
-        chat_history.append((None, additional_assistant_response))
-    else:
-        chat_history.append((user_input, assistant_response))
-    return "", chat_history, chat_history
 def restart_chat():
     return [], []
@@ -109,7 +77,7 @@ with gr.Blocks() as chat_interface:
     submit_button.click(
         generate_response,
         inputs=[user_input, chat_history],
-        outputs=[user_input, chatbox, chat_history]  # Clear user input and update chatbox and history
     )
     restart_button.click(

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import spaces
 # Load the model and tokenizer from Hugging Face
 model_path = "Ozaii/Wali-8B-Uncensored-Model"  # Replace with your model path
 # Set initial prompt to ensure desired behavior
 initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
 "use irregular language, abbreviations, swear words, and mimic the conversational style "
+"of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
 "You are just for chatting purposes, and your primary role is to copy the assistant's style and fun completely."
+"Avoid short one word answers")
 @spaces.GPU
 def generate_response(user_input, chat_history):
     max_context_length = 4096
     max_response_length = 2048
     prompt = initial_prompt + "\n"
     for message in chat_history:
         outputs = model.generate(
             inputs.input_ids,
             max_length=max_response_length,
+            min_length=128,
+            temperature=0.6,
+            top_k=27,
+            top_p=0.53,
+            repetition_penalty=1.1,
             no_repeat_ngram_size=3,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.eos_token_id
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_response = response.split("Assistant:")[-1].strip()
+    assistant_response = assistant_response.split('\n')[0].strip()
+    chat_history.append((user_input, assistant_response))
+    return chat_history, chat_history
 def restart_chat():
     return [], []
     submit_button.click(
         generate_response,
         inputs=[user_input, chat_history],
+        outputs=[chatbox, chat_history]
     )
     restart_button.click(