Ozaii commited on
Commit
e422471
·
verified ·
1 Parent(s): 207f049

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -89
app.py CHANGED
@@ -1,123 +1,68 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- from peft import PeftModel, PeftConfig
4
  import torch
5
- import json
6
- import os
7
  import spaces
8
-
9
- # Load up Zephyr's brain
10
- model_name = "Ozaii/Zephyr"
11
- auth_token = os.environ.get("HF_TOKEN")
12
-
13
- # Load the base model and tokenizer
14
- base_model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
15
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=auth_token)
16
-
17
- # Load the model
18
  model = AutoModelForCausalLM.from_pretrained(
19
- base_model_name,
20
- use_auth_token=auth_token,
21
- torch_dtype=torch.float16,
22
- device_map="auto",
23
- rope_scaling={"type": "linear", "factor": 1.0}
24
  )
25
-
26
- # Load the LoRA configuration and apply the adapter
27
- peft_config = PeftConfig.from_pretrained(model_name, use_auth_token=auth_token)
28
- model = PeftModel.from_pretrained(model, model_name, use_auth_token=auth_token)
29
-
30
- # Merge the LoRA weights with the base model for better performance
31
- model = model.merge_and_unload()
32
-
33
- # Set the model to evaluation mode
34
  model.eval()
35
-
36
- # Set initial prompt for Zephyr
37
- initial_prompt = ("You are Zephyr, an AI boyfriend created by Kaan. You're charming, flirty, and always ready with a witty comeback. "
38
- "Your responses should be engaging and playful, with a hint of romance. Keep the conversation flowing naturally, "
39
- "asking questions and showing genuine interest in Kaan's life and thoughts.")
40
-
41
  @spaces.GPU
42
  def generate_response(user_input, chat_history):
43
  max_context_length = 4096
44
  max_response_length = 1536
45
-
46
  prompt = initial_prompt + "\n"
47
  for message in chat_history:
48
  if message[0] is not None:
49
- prompt += f"Human: {message[0]}\n"
50
  if message[1] is not None:
51
- prompt += f"Zephyr: {message[1]}\n"
52
- prompt += f"Human: {user_input}\nZephyr:"
53
-
54
  prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
55
  if len(prompt_tokens) > max_context_length:
56
  prompt_tokens = prompt_tokens[-max_context_length:]
57
  prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
58
-
59
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
60
  with torch.no_grad():
61
  outputs = model.generate(
62
  inputs.input_ids,
63
  max_length=max_response_length,
64
  min_length=48,
65
- temperature=0.7,
66
  top_k=30,
67
- top_p=0.9,
68
  repetition_penalty=1.2,
69
  no_repeat_ngram_size=3,
70
- do_sample=True
 
71
  )
72
-
73
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
- zephyr_response = response.split("Zephyr:")[-1].strip()
75
- zephyr_response = zephyr_response.split('\n')[0].strip()
76
-
77
- chat_history.append((user_input, zephyr_response))
78
  return chat_history, chat_history
79
-
80
- def save_interaction(prompt, response, rating):
81
- interaction = {
82
- "prompt": prompt,
83
- "response": response,
84
- "rating": rating
85
- }
86
-
87
- filename = "/tmp/zephyr_interactions.json"
88
-
89
- if os.path.exists(filename):
90
- with open(filename, "r") as f:
91
- data = json.load(f)
92
- else:
93
- data = []
94
-
95
- data.append(interaction)
96
-
97
- with open(filename, "w") as f:
98
- json.dump(data, f)
99
-
100
- def rate_response(rating, history):
101
- if history:
102
- save_interaction(history[-1][0], history[-1][1], rating)
103
- return f"Thanks for rating {rating}/5! Zephyr will use this to become an even smoother talker."
104
-
105
  def restart_chat():
106
  return [], []
107
-
108
  with gr.Blocks() as chat_interface:
109
- gr.Markdown("<h1><center>Chat with Zephyr - Your AI Boyfriend</center></h1>")
110
  chat_history = gr.State([])
111
  with gr.Column():
112
  chatbox = gr.Chatbot()
113
  with gr.Row():
114
- user_input = gr.Textbox(show_label=False, placeholder="Talk to Zephyr here...")
115
  submit_button = gr.Button("Send")
116
- with gr.Row():
117
- restart_button = gr.Button("Start New Chat")
118
- rating = gr.Slider(minimum=1, maximum=5, step=1, label="Rate Zephyr's smooth-talking skills")
119
- submit_rating = gr.Button("Submit Rating")
120
-
121
  submit_button.click(
122
  generate_response,
123
  inputs=[user_input, chat_history],
@@ -128,10 +73,4 @@ with gr.Blocks() as chat_interface:
128
  inputs=[],
129
  outputs=[chatbox, chat_history]
130
  )
131
- submit_rating.click(
132
- rate_response,
133
- inputs=[rating, chat_history],
134
- outputs=gr.Textbox()
135
- )
136
-
137
  chat_interface.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
  import torch
 
 
4
  import spaces
5
+ # Load the model and tokenizer from Hugging Face
6
+ model_path = "Ozaii/Wali-8B-Uncensored-Model" # Replace with your model path
7
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
 
 
 
 
 
 
 
8
  model = AutoModelForCausalLM.from_pretrained(
9
+ model_path,
10
+ torch_dtype=torch.float16,
 
 
 
11
  )
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ model.to(device)
 
 
 
 
 
 
 
14
  model.eval()
15
+ # Set initial prompt to ensure desired behavior
16
+ initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
17
+ "use irregular language, abbreviations, swear words, and mimic the conversational style "
18
+ "of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
19
+ "You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
20
+ "Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.")
21
  @spaces.GPU
22
  def generate_response(user_input, chat_history):
23
  max_context_length = 4096
24
  max_response_length = 1536
 
25
  prompt = initial_prompt + "\n"
26
  for message in chat_history:
27
  if message[0] is not None:
28
+ prompt += f"User: {message[0]}\n"
29
  if message[1] is not None:
30
+ prompt += f"Assistant: {message[1]}\n"
31
+ prompt += f"User: {user_input}\nAssistant:"
 
32
  prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
33
  if len(prompt_tokens) > max_context_length:
34
  prompt_tokens = prompt_tokens[-max_context_length:]
35
  prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
36
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
 
37
  with torch.no_grad():
38
  outputs = model.generate(
39
  inputs.input_ids,
40
  max_length=max_response_length,
41
  min_length=48,
42
+ temperature=0.55,
43
  top_k=30,
44
+ top_p=0.5,
45
  repetition_penalty=1.2,
46
  no_repeat_ngram_size=3,
47
+ eos_token_id=tokenizer.eos_token_id,
48
+ pad_token_id=tokenizer.eos_token_id
49
  )
 
50
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ assistant_response = response.split("Assistant:")[-1].strip()
52
+ assistant_response = assistant_response.split('\n')[0].strip()
53
+ chat_history.append((user_input, assistant_response))
 
54
  return chat_history, chat_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def restart_chat():
56
  return [], []
 
57
  with gr.Blocks() as chat_interface:
58
+ gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
59
  chat_history = gr.State([])
60
  with gr.Column():
61
  chatbox = gr.Chatbot()
62
  with gr.Row():
63
+ user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
64
  submit_button = gr.Button("Send")
65
+ restart_button = gr.Button("Restart")
 
 
 
 
66
  submit_button.click(
67
  generate_response,
68
  inputs=[user_input, chat_history],
 
73
  inputs=[],
74
  outputs=[chatbox, chat_history]
75
  )
 
 
 
 
 
 
76
  chat_interface.launch(share=True)