michailroussos commited on
Commit
4668547
·
1 Parent(s): f5a59a6
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -17,11 +17,9 @@ FastLanguageModel.for_inference(model) # Enable optimized inference
17
  # Define the response function
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
  messages = [{"role": "system", "content": system_message}]
20
- for user_msg, assistant_msg in history:
21
- if user_msg:
22
- messages.append({"role": "user", "content": user_msg})
23
- if assistant_msg:
24
- messages.append({"role": "assistant", "content": assistant_msg})
25
  messages.append({"role": "user", "content": message})
26
 
27
  inputs = tokenizer.apply_chat_template(
@@ -32,7 +30,6 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
32
  ).to("cuda" if torch.cuda.is_available() else "cpu")
33
 
34
  attention_mask = inputs.ne(tokenizer.pad_token_id).long()
35
-
36
  generated_tokens = model.generate(
37
  input_ids=inputs,
38
  attention_mask=attention_mask,
@@ -43,8 +40,13 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
43
  )
44
  response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
45
 
46
- history.append((message, response)) # Update history with new exchange
47
- return history # Return the updated history
 
 
 
 
 
48
 
49
  # Define the Gradio interface
50
  demo = gr.ChatInterface(
@@ -59,4 +61,4 @@ demo = gr.ChatInterface(
59
  )
60
 
61
  if __name__ == "__main__":
62
- demo.launch(share=False) # Set share=False for local testing
 
17
  # Define the response function
18
  def respond(message, history, system_message, max_tokens, temperature, top_p):
19
  messages = [{"role": "system", "content": system_message}]
20
+ for exchange in history:
21
+ messages.append({"role": "user", "content": exchange["user"]})
22
+ messages.append({"role": "assistant", "content": exchange["assistant"]})
 
 
23
  messages.append({"role": "user", "content": message})
24
 
25
  inputs = tokenizer.apply_chat_template(
 
30
  ).to("cuda" if torch.cuda.is_available() else "cpu")
31
 
32
  attention_mask = inputs.ne(tokenizer.pad_token_id).long()
 
33
  generated_tokens = model.generate(
34
  input_ids=inputs,
35
  attention_mask=attention_mask,
 
40
  )
41
  response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
42
 
43
+ history.append({"user": message, "assistant": response})
44
+ formatted_history = [
45
+ {"role": "user", "content": exchange["user"]} if "user" in exchange else
46
+ {"role": "assistant", "content": exchange["assistant"]}
47
+ for exchange in history
48
+ ]
49
+ return formatted_history
50
 
51
  # Define the Gradio interface
52
  demo = gr.ChatInterface(
 
61
  )
62
 
63
  if __name__ == "__main__":
64
+ demo.launch(share=False) # Use share=False for local testing