Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,9 @@ def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9,
|
|
18 |
# Encode the prompt to tensor
|
19 |
input_ids = tokenizer.encode(prompt, return_tensors='pt')
|
20 |
|
|
|
|
|
|
|
21 |
# Generate a response using the model with adjusted parameters
|
22 |
response_ids = model.generate(
|
23 |
input_ids,
|
@@ -75,14 +78,9 @@ def generate(message, history_with_input, max_new_tokens, temperature, top_p, to
|
|
75 |
raise ValueError
|
76 |
|
77 |
history = history_with_input[:-1]
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
yield history + [(message, first_response)]
|
82 |
-
except StopIteration:
|
83 |
-
yield history + [(message, '')]
|
84 |
-
for response in generator:
|
85 |
-
yield history + [(message, response)]
|
86 |
|
87 |
def process_example(message):
|
88 |
generator = generate(message, [], 1024, 2.5, 0.95, 900)
|
|
|
18 |
# Encode the prompt to tensor
|
19 |
input_ids = tokenizer.encode(prompt, return_tensors='pt')
|
20 |
|
21 |
+
# Move input_ids to the same device as the model
|
22 |
+
input_ids = input_ids.to(model.device)
|
23 |
+
|
24 |
# Generate a response using the model with adjusted parameters
|
25 |
response_ids = model.generate(
|
26 |
input_ids,
|
|
|
78 |
raise ValueError
|
79 |
|
80 |
history = history_with_input[:-1]
|
81 |
+
response = run(message, history, max_new_tokens, temperature, top_p, top_k)
|
82 |
+
yield history + [(message, response)]
|
83 |
+
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
def process_example(message):
|
86 |
generator = generate(message, [], 1024, 2.5, 0.95, 900)
|