Spaces:

michailroussos
/

ID2223_9D_withGPU

Runtime error

App Files Files Community

michailroussos commited on Dec 9, 2024

Commit

6300d69

1 Parent(s): 04cf79a

Browse files

Files changed (1) hide show

app.py +32 -15

app.py CHANGED Viewed

@@ -16,23 +16,28 @@ FastLanguageModel.for_inference(model)  # Enable optimized inference
 # Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Print to show the inputs at the start
     print(f"Received message: {message}")
     print(f"Current history: {history}")
-    # Prepare the messages for the model: Exclude the system message for now
     messages = []
     if history:
         for entry in history:
-            print(f"Adding user message to history: {entry['user']}")
-            print(f"Adding assistant message to history: {entry['assistant']}")
             messages.append({"role": "user", "content": entry["user"]})
             messages.append({"role": "assistant", "content": entry["assistant"]})
-    # Add the user's new message to the list
     print(f"Adding current user message: {message}")
     messages.append({"role": "user", "content": message})
     # Tokenize the input (prepare the data for the model)
     print("Preparing the input for the model...")
     inputs = tokenizer.apply_chat_template(
@@ -44,18 +49,23 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     # Print the tokenized inputs
     print(f"Tokenized inputs: {inputs}")
     # Generate the response
     attention_mask = inputs.ne(tokenizer.pad_token_id).long()
     print(f"Attention mask: {attention_mask}")
-    generated_tokens = model.generate(
-        input_ids=inputs,
-        attention_mask=attention_mask,
-        max_new_tokens=max_tokens,
-        use_cache=True,
-        temperature=temperature,
-        top_p=top_p,
-    )
     # Decode the generated response
     response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
@@ -66,7 +76,13 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
         history = []
     history.append({"user": message, "assistant": response})
     # Prepare the history for Gradio: Formatting it correctly
     formatted_history = []
     for entry in history:
         print(f"Formatting user message for history: {entry['user']}")
@@ -82,6 +98,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
 # Define the Gradio interface
 demo = gr.ChatInterface(
     fn=respond,

 # Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # Print the inputs at the start
+    print("===== Respond Function Called =====")
     print(f"Received message: {message}")
     print(f"Current history: {history}")
+    # Prepare the messages for the model
     messages = []
     if history:
+        print("Adding previous messages to the history...")
         for entry in history:
+            print(f"User message: {entry['user']}")
+            print(f"Assistant message: {entry['assistant']}")
             messages.append({"role": "user", "content": entry["user"]})
             messages.append({"role": "assistant", "content": entry["assistant"]})
+    # Add the current user message
     print(f"Adding current user message: {message}")
     messages.append({"role": "user", "content": message})
+    # Print the messages list before tokenization
+    print("Messages before tokenization:", messages)
     # Tokenize the input (prepare the data for the model)
     print("Preparing the input for the model...")
     inputs = tokenizer.apply_chat_template(
     # Print the tokenized inputs
     print(f"Tokenized inputs: {inputs}")
     # Generate the response
     attention_mask = inputs.ne(tokenizer.pad_token_id).long()
     print(f"Attention mask: {attention_mask}")
+    try:
+        generated_tokens = model.generate(
+            input_ids=inputs,
+            attention_mask=attention_mask,
+            max_new_tokens=max_tokens,
+            use_cache=True,
+            temperature=temperature,
+            top_p=top_p,
+        )
+    except Exception as e:
+        print(f"Error during model generation: {e}")
+        return []
     # Decode the generated response
     response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
         history = []
     history.append({"user": message, "assistant": response})
+    # Check and filter out unwanted system-level messages or metadata
+    if "system" in response.lower():
+        print("System message detected. Replacing with fallback response.")
+        response = "Sorry, something went wrong. Please try again."
     # Prepare the history for Gradio: Formatting it correctly
+    print("Formatting history for Gradio...")
     formatted_history = []
     for entry in history:
         print(f"Formatting user message for history: {entry['user']}")
 # Define the Gradio interface
 demo = gr.ChatInterface(
     fn=respond,

more