michailroussos commited on
Commit
37a21af
·
1 Parent(s): f9e2c2e

small changes

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -34,7 +34,8 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
34
  tokenize=True,
35
  add_generation_prompt=True, # Required for generation
36
  return_tensors="pt",
37
- ).to("cuda" if torch.cuda.is_available() else "cpu")
 
38
 
39
  # Initialize a TextStreamer for streaming output
40
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
@@ -42,7 +43,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
42
  # Generate the model's response
43
  response = ""
44
  for token_ids in model.generate(
45
- input_ids=inputs.input_ids,
46
  streamer=text_streamer,
47
  max_new_tokens=max_tokens,
48
  use_cache=True,
@@ -66,4 +67,4 @@ demo = gr.ChatInterface(
66
  )
67
 
68
  if __name__ == "__main__":
69
- demo.launch()
 
34
  tokenize=True,
35
  add_generation_prompt=True, # Required for generation
36
  return_tensors="pt",
37
+ )
38
+ input_ids = inputs.to("cuda" if torch.cuda.is_available() else "cpu")
39
 
40
  # Initialize a TextStreamer for streaming output
41
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
 
43
  # Generate the model's response
44
  response = ""
45
  for token_ids in model.generate(
46
+ input_ids=input_ids, # Use the tensor directly
47
  streamer=text_streamer,
48
  max_new_tokens=max_tokens,
49
  use_cache=True,
 
67
  )
68
 
69
  if __name__ == "__main__":
70
+ demo.launch(share=True)