Spaces:

michailroussos
/

ID2223_9D_withGPU

Runtime error

michailroussos commited on Dec 9, 2024

Commit

4668547

1 Parent(s): f5a59a6

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,11 +17,9 @@ FastLanguageModel.for_inference(model)  # Enable optimized inference
 # Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     messages = [{"role": "system", "content": system_message}]
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     inputs = tokenizer.apply_chat_template(
@@ -32,7 +30,6 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     ).to("cuda" if torch.cuda.is_available() else "cpu")
     attention_mask = inputs.ne(tokenizer.pad_token_id).long()
     generated_tokens = model.generate(
         input_ids=inputs,
         attention_mask=attention_mask,
@@ -43,8 +40,13 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     )
     response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
-    history.append((message, response))  # Update history with new exchange
-    return history  # Return the updated history
 # Define the Gradio interface
 demo = gr.ChatInterface(
@@ -59,4 +61,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch(share=False)  # Set share=False for local testing

 # Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
     messages = [{"role": "system", "content": system_message}]
+    for exchange in history:
+        messages.append({"role": "user", "content": exchange["user"]})
+        messages.append({"role": "assistant", "content": exchange["assistant"]})
     messages.append({"role": "user", "content": message})
     inputs = tokenizer.apply_chat_template(
     ).to("cuda" if torch.cuda.is_available() else "cpu")
     attention_mask = inputs.ne(tokenizer.pad_token_id).long()
     generated_tokens = model.generate(
         input_ids=inputs,
         attention_mask=attention_mask,
     )
     response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
+    history.append({"user": message, "assistant": response})
+    formatted_history = [
+        {"role": "user", "content": exchange["user"]} if "user" in exchange else
+        {"role": "assistant", "content": exchange["assistant"]}
+        for exchange in history
+    ]
+    return formatted_history
 # Define the Gradio interface
 demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
+    demo.launch(share=False)  # Use share=False for local testing

more