michailroussos commited on
Commit
6300d69
·
1 Parent(s): 04cf79a
Files changed (1) hide show
  1. app.py +32 -15
app.py CHANGED
@@ -16,23 +16,28 @@ FastLanguageModel.for_inference(model) # Enable optimized inference
16
 
17
  # Define the response function
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
- # Print to show the inputs at the start
 
20
  print(f"Received message: {message}")
21
  print(f"Current history: {history}")
22
 
23
- # Prepare the messages for the model: Exclude the system message for now
24
  messages = []
25
  if history:
 
26
  for entry in history:
27
- print(f"Adding user message to history: {entry['user']}")
28
- print(f"Adding assistant message to history: {entry['assistant']}")
29
  messages.append({"role": "user", "content": entry["user"]})
30
  messages.append({"role": "assistant", "content": entry["assistant"]})
31
-
32
- # Add the user's new message to the list
33
  print(f"Adding current user message: {message}")
34
  messages.append({"role": "user", "content": message})
35
 
 
 
 
36
  # Tokenize the input (prepare the data for the model)
37
  print("Preparing the input for the model...")
38
  inputs = tokenizer.apply_chat_template(
@@ -44,18 +49,23 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
44
 
45
  # Print the tokenized inputs
46
  print(f"Tokenized inputs: {inputs}")
47
-
48
  # Generate the response
49
  attention_mask = inputs.ne(tokenizer.pad_token_id).long()
50
  print(f"Attention mask: {attention_mask}")
51
- generated_tokens = model.generate(
52
- input_ids=inputs,
53
- attention_mask=attention_mask,
54
- max_new_tokens=max_tokens,
55
- use_cache=True,
56
- temperature=temperature,
57
- top_p=top_p,
58
- )
 
 
 
 
 
59
 
60
  # Decode the generated response
61
  response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
@@ -66,7 +76,13 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
66
  history = []
67
  history.append({"user": message, "assistant": response})
68
 
 
 
 
 
 
69
  # Prepare the history for Gradio: Formatting it correctly
 
70
  formatted_history = []
71
  for entry in history:
72
  print(f"Formatting user message for history: {entry['user']}")
@@ -82,6 +98,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
82
 
83
 
84
 
 
85
  # Define the Gradio interface
86
  demo = gr.ChatInterface(
87
  fn=respond,
 
16
 
17
  # Define the response function
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
+ # Print the inputs at the start
20
+ print("===== Respond Function Called =====")
21
  print(f"Received message: {message}")
22
  print(f"Current history: {history}")
23
 
24
+ # Prepare the messages for the model
25
  messages = []
26
  if history:
27
+ print("Adding previous messages to the history...")
28
  for entry in history:
29
+ print(f"User message: {entry['user']}")
30
+ print(f"Assistant message: {entry['assistant']}")
31
  messages.append({"role": "user", "content": entry["user"]})
32
  messages.append({"role": "assistant", "content": entry["assistant"]})
33
+
34
+ # Add the current user message
35
  print(f"Adding current user message: {message}")
36
  messages.append({"role": "user", "content": message})
37
 
38
+ # Print the messages list before tokenization
39
+ print("Messages before tokenization:", messages)
40
+
41
  # Tokenize the input (prepare the data for the model)
42
  print("Preparing the input for the model...")
43
  inputs = tokenizer.apply_chat_template(
 
49
 
50
  # Print the tokenized inputs
51
  print(f"Tokenized inputs: {inputs}")
52
+
53
  # Generate the response
54
  attention_mask = inputs.ne(tokenizer.pad_token_id).long()
55
  print(f"Attention mask: {attention_mask}")
56
+
57
+ try:
58
+ generated_tokens = model.generate(
59
+ input_ids=inputs,
60
+ attention_mask=attention_mask,
61
+ max_new_tokens=max_tokens,
62
+ use_cache=True,
63
+ temperature=temperature,
64
+ top_p=top_p,
65
+ )
66
+ except Exception as e:
67
+ print(f"Error during model generation: {e}")
68
+ return []
69
 
70
  # Decode the generated response
71
  response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
 
76
  history = []
77
  history.append({"user": message, "assistant": response})
78
 
79
+ # Check and filter out unwanted system-level messages or metadata
80
+ if "system" in response.lower():
81
+ print("System message detected. Replacing with fallback response.")
82
+ response = "Sorry, something went wrong. Please try again."
83
+
84
  # Prepare the history for Gradio: Formatting it correctly
85
+ print("Formatting history for Gradio...")
86
  formatted_history = []
87
  for entry in history:
88
  print(f"Formatting user message for history: {entry['user']}")
 
98
 
99
 
100
 
101
+
102
  # Define the Gradio interface
103
  demo = gr.ChatInterface(
104
  fn=respond,