Ozaii commited on
Commit
610112a
·
verified ·
1 Parent(s): dd3d4b2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ # Load the model and tokenizer from Hugging Face
6
+ model_path = "Ozaii/Wali-8B-Uncensored" # Replace with your model path
7
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
8
+ bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ llm_int8_threshold=6.0
11
+ )
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_path,
14
+ config=bnb_config,
15
+ torch_dtype=torch.float16,
16
+ )
17
+
18
+ # Convert model to BetterTransformer for faster inference
19
+ model = model.to_bettertransformer()
20
+
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ model.to(device)
23
+ model.eval()
24
+
25
+ # Set initial prompt to ensure desired behavior
26
+ initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
27
+ "use irregular language, abbreviations, swear words, and mimic the conversational style "
28
+ "of the assistant in the chat dataset. You are just for chatting purposes, and your primary role "
29
+ "is to copy the assistant's style completely.")
30
+
31
+ @spaces.GPU
32
+ def generate_response(user_input, chat_history):
33
+ max_context_length = 750
34
+ max_response_length = 150
35
+
36
+ prompt = initial_prompt + "\n"
37
+ for message in chat_history:
38
+ if message[0] is not None:
39
+ prompt += f"User: {message[0]}\n"
40
+ if message[1] is not None:
41
+ prompt += f"Assistant: {message[1]}\n"
42
+ prompt += f"User: {user_input}\nAssistant:"
43
+
44
+ prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
45
+ if len(prompt_tokens) > max_context_length:
46
+ prompt_tokens = prompt_tokens[-max_context_length:]
47
+ prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
48
+
49
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
50
+ with torch.no_grad():
51
+ outputs = model.generate(
52
+ inputs.input_ids,
53
+ max_length=len(inputs.input_ids[0]) + max_response_length,
54
+ min_length=45,
55
+ temperature=0.7,
56
+ top_k=30,
57
+ top_p=0.9,
58
+ repetition_penalty=1.1,
59
+ no_repeat_ngram_size=3,
60
+ eos_token_id=tokenizer.eos_token_id,
61
+ pad_token_id=tokenizer.eos_token_id
62
+ )
63
+
64
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
65
+ assistant_response = response.split("Assistant:")[-1].strip()
66
+ assistant_response = assistant_response.split('\n')[0].strip()
67
+ chat_history.append((user_input, assistant_response))
68
+ return chat_history, chat_history
69
+
70
+ def restart_chat():
71
+ return [], []
72
+
73
+ with gr.Blocks() as chat_interface:
74
+ gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
75
+ chat_history = gr.State([])
76
+ with gr.Column():
77
+ chatbox = gr.Chatbot()
78
+ with gr.Row():
79
+ user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
80
+ submit_button = gr.Button("Send")
81
+ restart_button = gr.Button("Restart")
82
+
83
+ submit_button.click(
84
+ generate_response,
85
+ inputs=[user_input, chat_history],
86
+ outputs=[chatbox, chat_history]
87
+ )
88
+
89
+ restart_button.click(
90
+ restart_chat,
91
+ inputs=[],
92
+ outputs=[chatbox, chat_history]
93
+ )
94
+
95
+ chat_interface.launch()