Spaces:

TeamTonic
/

TonicsYI-6B-200k

Paused

Tonic commited on Nov 13, 2023

Commit

68f6e9e

1 Parent(s): 97d635c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,6 +18,9 @@ def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9,
     # Encode the prompt to tensor
     input_ids = tokenizer.encode(prompt, return_tensors='pt')
     # Generate a response using the model with adjusted parameters
     response_ids = model.generate(
         input_ids,
@@ -75,14 +78,9 @@ def generate(message, history_with_input, max_new_tokens, temperature, top_p, to
         raise ValueError
     history = history_with_input[:-1]
-    generator = run(message, history, max_new_tokens, temperature, top_p, top_k)
-    try:
-        first_response = next(generator)
-        yield history + [(message, first_response)]
-    except StopIteration:
-        yield history + [(message, '')]
-    for response in generator:
-        yield history + [(message, response)]
 def process_example(message):
     generator = generate(message, [], 1024, 2.5, 0.95, 900)

     # Encode the prompt to tensor
     input_ids = tokenizer.encode(prompt, return_tensors='pt')
+    # Move input_ids to the same device as the model
+    input_ids = input_ids.to(model.device)
     # Generate a response using the model with adjusted parameters
     response_ids = model.generate(
         input_ids,
         raise ValueError
     history = history_with_input[:-1]
+    response = run(message, history, max_new_tokens, temperature, top_p, top_k)
+    yield history + [(message, response)]
 def process_example(message):
     generator = generate(message, [], 1024, 2.5, 0.95, 900)