Spaces:

michailroussos
/

ID2223_9D_withGPU

Runtime error

App Files Files Community

michailroussos commited on Dec 9, 2024

Commit

3a645a0

1 Parent(s): 0556c99

Browse files

Files changed (1) hide show

app.py +13 -25

app.py CHANGED Viewed

@@ -16,19 +16,17 @@ FastLanguageModel.for_inference(model)  # Enable optimized inference
 # Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Prepare the messages, separating the system message from user/assistant pairs
-    messages = [{"role": "system", "content": system_message}]
-    # Append the conversation history (user-assistant pairs)
     if history:
         for entry in history:
             messages.append({"role": "user", "content": entry["user"]})
             messages.append({"role": "assistant", "content": entry["assistant"]})
-    # Add the user's new message to the list of messages
     messages.append({"role": "user", "content": message})
-    # Tokenize the input
     inputs = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
@@ -37,44 +35,34 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     ).to("cuda" if torch.cuda.is_available() else "cpu")
     # Generate the response
-    #attention_mask = inputs.ne(tokenizer.pad_token_id).long()
     generated_tokens = model.generate(
         input_ids=inputs,
-        #attention_mask=attention_mask,
         max_new_tokens=max_tokens,
         use_cache=True,
         temperature=temperature,
         top_p=top_p,
     )
-    response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
-    # Clean the response to ensure no system messages are included
-    response = response.replace("Cutting Knowledge Date", "").replace("You are a helpful assistant.", "").strip()
-    # Debug: Print the raw and cleaned assistant response
-    print("Raw Assistant Response:", response)
-    # Update the conversation history with the new user-assistant interaction
     if history is None:
         history = []
     history.append({"user": message, "assistant": response})
-    # Debug: Print updated history
-    print("Updated History:", history)
-    # Format the history into the structure expected by Gradio
     formatted_history = []
     for entry in history:
         formatted_history.append({"role": "user", "content": entry["user"]})
         formatted_history.append({"role": "assistant", "content": entry["assistant"]})
-    # Debug: Print the formatted history
-    print("Formatted History:", formatted_history)
-    # Return the formatted history
     return formatted_history
 # Define the Gradio interface
 demo = gr.ChatInterface(
     fn=respond,

 # Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # Start by preparing only the conversation history (user-assistant pairs)
+    messages = []
     if history:
         for entry in history:
             messages.append({"role": "user", "content": entry["user"]})
             messages.append({"role": "assistant", "content": entry["assistant"]})
+    # Add the user's new message to the list
     messages.append({"role": "user", "content": message})
+    # Tokenize the input (prepare the data for the model)
     inputs = tokenizer.apply_chat_template(
         messages,
         tokenize=True,
     ).to("cuda" if torch.cuda.is_available() else "cpu")
     # Generate the response
+    attention_mask = inputs.ne(tokenizer.pad_token_id).long()
     generated_tokens = model.generate(
         input_ids=inputs,
+        attention_mask=attention_mask,
         max_new_tokens=max_tokens,
         use_cache=True,
         temperature=temperature,
         top_p=top_p,
     )
+    response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+    # Update the conversation history with the new user-assistant pair
     if history is None:
         history = []
     history.append({"user": message, "assistant": response})
+    # Prepare the history for Gradio
     formatted_history = []
     for entry in history:
         formatted_history.append({"role": "user", "content": entry["user"]})
         formatted_history.append({"role": "assistant", "content": entry["assistant"]})
+    # Return the formatted history for Gradio to display
     return formatted_history
 # Define the Gradio interface
 demo = gr.ChatInterface(
     fn=respond,

more