michailroussos commited on
Commit
3a645a0
·
1 Parent(s): 0556c99
Files changed (1) hide show
  1. app.py +13 -25
app.py CHANGED
@@ -16,19 +16,17 @@ FastLanguageModel.for_inference(model) # Enable optimized inference
16
 
17
  # Define the response function
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
- # Prepare the messages, separating the system message from user/assistant pairs
20
- messages = [{"role": "system", "content": system_message}]
21
-
22
- # Append the conversation history (user-assistant pairs)
23
  if history:
24
  for entry in history:
25
  messages.append({"role": "user", "content": entry["user"]})
26
  messages.append({"role": "assistant", "content": entry["assistant"]})
27
-
28
- # Add the user's new message to the list of messages
29
  messages.append({"role": "user", "content": message})
30
 
31
- # Tokenize the input
32
  inputs = tokenizer.apply_chat_template(
33
  messages,
34
  tokenize=True,
@@ -37,44 +35,34 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
37
  ).to("cuda" if torch.cuda.is_available() else "cpu")
38
 
39
  # Generate the response
40
- #attention_mask = inputs.ne(tokenizer.pad_token_id).long()
41
  generated_tokens = model.generate(
42
  input_ids=inputs,
43
- #attention_mask=attention_mask,
44
  max_new_tokens=max_tokens,
45
  use_cache=True,
46
  temperature=temperature,
47
  top_p=top_p,
48
  )
49
- response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
50
-
51
- # Clean the response to ensure no system messages are included
52
- response = response.replace("Cutting Knowledge Date", "").replace("You are a helpful assistant.", "").strip()
53
-
54
- # Debug: Print the raw and cleaned assistant response
55
- print("Raw Assistant Response:", response)
56
 
57
- # Update the conversation history with the new user-assistant interaction
 
 
58
  if history is None:
59
  history = []
60
  history.append({"user": message, "assistant": response})
61
 
62
- # Debug: Print updated history
63
- print("Updated History:", history)
64
-
65
- # Format the history into the structure expected by Gradio
66
  formatted_history = []
67
  for entry in history:
68
  formatted_history.append({"role": "user", "content": entry["user"]})
69
  formatted_history.append({"role": "assistant", "content": entry["assistant"]})
70
 
71
- # Debug: Print the formatted history
72
- print("Formatted History:", formatted_history)
73
-
74
- # Return the formatted history
75
  return formatted_history
76
 
77
 
 
78
  # Define the Gradio interface
79
  demo = gr.ChatInterface(
80
  fn=respond,
 
16
 
17
  # Define the response function
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
+ # Start by preparing only the conversation history (user-assistant pairs)
20
+ messages = []
 
 
21
  if history:
22
  for entry in history:
23
  messages.append({"role": "user", "content": entry["user"]})
24
  messages.append({"role": "assistant", "content": entry["assistant"]})
25
+
26
+ # Add the user's new message to the list
27
  messages.append({"role": "user", "content": message})
28
 
29
+ # Tokenize the input (prepare the data for the model)
30
  inputs = tokenizer.apply_chat_template(
31
  messages,
32
  tokenize=True,
 
35
  ).to("cuda" if torch.cuda.is_available() else "cpu")
36
 
37
  # Generate the response
38
+ attention_mask = inputs.ne(tokenizer.pad_token_id).long()
39
  generated_tokens = model.generate(
40
  input_ids=inputs,
41
+ attention_mask=attention_mask,
42
  max_new_tokens=max_tokens,
43
  use_cache=True,
44
  temperature=temperature,
45
  top_p=top_p,
46
  )
 
 
 
 
 
 
 
47
 
48
+ response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
49
+
50
+ # Update the conversation history with the new user-assistant pair
51
  if history is None:
52
  history = []
53
  history.append({"user": message, "assistant": response})
54
 
55
+ # Prepare the history for Gradio
 
 
 
56
  formatted_history = []
57
  for entry in history:
58
  formatted_history.append({"role": "user", "content": entry["user"]})
59
  formatted_history.append({"role": "assistant", "content": entry["assistant"]})
60
 
61
+ # Return the formatted history for Gradio to display
 
 
 
62
  return formatted_history
63
 
64
 
65
+
66
  # Define the Gradio interface
67
  demo = gr.ChatInterface(
68
  fn=respond,