Ozaii commited on
Commit
3c286e6
·
verified ·
1 Parent(s): e422471

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -31
app.py CHANGED
@@ -2,75 +2,80 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import spaces
 
5
  # Load the model and tokenizer from Hugging Face
6
- model_path = "Ozaii/Wali-8B-Uncensored-Model" # Replace with your model path
7
  tokenizer = AutoTokenizer.from_pretrained(model_path)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_path,
10
  torch_dtype=torch.float16,
11
  )
 
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
  model.to(device)
14
  model.eval()
15
- # Set initial prompt to ensure desired behavior
16
- initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
17
- "use irregular language, abbreviations, swear words, and mimic the conversational style "
18
- "of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
19
- "You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
20
- "Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.")
 
 
21
  @spaces.GPU
22
  def generate_response(user_input, chat_history):
23
  max_context_length = 4096
24
- max_response_length = 1536
 
 
25
  prompt = initial_prompt + "\n"
26
  for message in chat_history:
27
  if message[0] is not None:
28
- prompt += f"User: {message[0]}\n"
29
  if message[1] is not None:
30
- prompt += f"Assistant: {message[1]}\n"
31
- prompt += f"User: {user_input}\nAssistant:"
 
32
  prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
33
  if len(prompt_tokens) > max_context_length:
34
  prompt_tokens = prompt_tokens[-max_context_length:]
35
  prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
 
36
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
37
  with torch.no_grad():
38
  outputs = model.generate(
39
  inputs.input_ids,
40
  max_length=max_response_length,
41
- min_length=48,
42
- temperature=0.55,
43
- top_k=30,
44
- top_p=0.5,
45
  repetition_penalty=1.2,
46
  no_repeat_ngram_size=3,
47
  eos_token_id=tokenizer.eos_token_id,
48
  pad_token_id=tokenizer.eos_token_id
49
  )
 
50
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
- assistant_response = response.split("Assistant:")[-1].strip()
52
- assistant_response = assistant_response.split('\n')[0].strip()
53
- chat_history.append((user_input, assistant_response))
54
- return chat_history, chat_history
55
- def restart_chat():
56
- return [], []
57
  with gr.Blocks() as chat_interface:
58
- gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
59
  chat_history = gr.State([])
60
  with gr.Column():
61
  chatbox = gr.Chatbot()
62
  with gr.Row():
63
- user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
64
  submit_button = gr.Button("Send")
65
- restart_button = gr.Button("Restart")
66
  submit_button.click(
67
  generate_response,
68
  inputs=[user_input, chat_history],
69
- outputs=[chatbox, chat_history]
70
  )
71
- restart_button.click(
72
- restart_chat,
73
- inputs=[],
74
- outputs=[chatbox, chat_history]
75
- )
76
- chat_interface.launch(share=True)
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import spaces
5
+
6
  # Load the model and tokenizer from Hugging Face
7
+ model_path = "Ozaii/Zephyr" # Your Zephyr model path
8
  tokenizer = AutoTokenizer.from_pretrained(model_path)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_path,
11
  torch_dtype=torch.float16,
12
  )
13
+
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  model.to(device)
16
  model.eval()
17
+
18
+ # Set initial prompt for Zephyr
19
+ initial_prompt = ("You are Zephyr, an AI boyfriend created by Kaan. You're charming, flirty, "
20
+ "and always ready with a witty comeback. Your responses should be engaging "
21
+ "and playful, with a hint of romance. Keep the conversation flowing naturally, "
22
+ "asking questions and showing genuine interest in Kaan's life and thoughts. "
23
+ "Use a mix of English and Turkish expressions occasionally.")
24
+
25
  @spaces.GPU
26
  def generate_response(user_input, chat_history):
27
  max_context_length = 4096
28
+ max_response_length = 2048
29
+ min_response_length = 24 # Increased for more substantial responses
30
+
31
  prompt = initial_prompt + "\n"
32
  for message in chat_history:
33
  if message[0] is not None:
34
+ prompt += f"Human: {message[0]}\n"
35
  if message[1] is not None:
36
+ prompt += f"Zephyr: {message[1]}\n"
37
+ prompt += f"Human: {user_input}\nZephyr:"
38
+
39
  prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
40
  if len(prompt_tokens) > max_context_length:
41
  prompt_tokens = prompt_tokens[-max_context_length:]
42
  prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
43
+
44
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
45
  with torch.no_grad():
46
  outputs = model.generate(
47
  inputs.input_ids,
48
  max_length=max_response_length,
49
+ min_length=min_response_length,
50
+ temperature=0.7, # Slightly higher for more creative responses
51
+ top_k=40,
52
+ top_p=0.9,
53
  repetition_penalty=1.2,
54
  no_repeat_ngram_size=3,
55
  eos_token_id=tokenizer.eos_token_id,
56
  pad_token_id=tokenizer.eos_token_id
57
  )
58
+
59
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
+ zephyr_response = response.split("Zephyr:")[-1].strip()
61
+
62
+ chat_history.append((user_input, zephyr_response))
63
+
64
+ return "", chat_history, chat_history
65
+
66
  with gr.Blocks() as chat_interface:
67
+ gr.Markdown("<h1><center>Chat with Zephyr - Your AI Boyfriend</center></h1>")
68
  chat_history = gr.State([])
69
  with gr.Column():
70
  chatbox = gr.Chatbot()
71
  with gr.Row():
72
+ user_input = gr.Textbox(show_label=False, placeholder="Talk to Zephyr here...")
73
  submit_button = gr.Button("Send")
74
+
75
  submit_button.click(
76
  generate_response,
77
  inputs=[user_input, chat_history],
78
+ outputs=[user_input, chatbox, chat_history] # Clear user input and update chatbox and history
79
  )
80
+
81
+ chat_interface.launch()