Spaces:
Paused
Paused
NGUYEN, Xuan Phi
commited on
Commit
·
43147aa
1
Parent(s):
2997e80
update
Browse files
multipurpose_chatbot/demos/multimodal_chat_interface.py
CHANGED
|
@@ -944,8 +944,8 @@ def vision_chat_response_stream_multiturn_engine(
|
|
| 944 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
| 945 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
| 946 |
|
| 947 |
-
print(f'{image_paths=}')
|
| 948 |
-
print(full_prompt)
|
| 949 |
outputs = None
|
| 950 |
response = None
|
| 951 |
num_tokens = -1
|
|
@@ -995,7 +995,7 @@ def doc_chat_response_stream_multiturn_engine(
|
|
| 995 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
| 996 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
| 997 |
|
| 998 |
-
print(full_prompt)
|
| 999 |
outputs = None
|
| 1000 |
response = None
|
| 1001 |
num_tokens = -1
|
|
@@ -1050,8 +1050,8 @@ def vision_doc_chat_response_stream_multiturn_engine(
|
|
| 1050 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
| 1051 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
| 1052 |
|
| 1053 |
-
print(full_prompt)
|
| 1054 |
-
print(f'{image_paths=}')
|
| 1055 |
outputs = None
|
| 1056 |
response = None
|
| 1057 |
num_tokens = -1
|
|
|
|
| 944 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
| 945 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
| 946 |
|
| 947 |
+
# print(f'{image_paths=}')
|
| 948 |
+
# print(full_prompt)
|
| 949 |
outputs = None
|
| 950 |
response = None
|
| 951 |
num_tokens = -1
|
|
|
|
| 995 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
| 996 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
| 997 |
|
| 998 |
+
# print(full_prompt)
|
| 999 |
outputs = None
|
| 1000 |
response = None
|
| 1001 |
num_tokens = -1
|
|
|
|
| 1050 |
if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128:
|
| 1051 |
raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.")
|
| 1052 |
|
| 1053 |
+
# print(full_prompt)
|
| 1054 |
+
# print(f'{image_paths=}')
|
| 1055 |
outputs = None
|
| 1056 |
response = None
|
| 1057 |
num_tokens = -1
|
multipurpose_chatbot/engines/transformers_engine.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
| 1 |
|
| 2 |
try:
|
| 3 |
import spaces
|
|
|
|
|
|
|
|
|
|
| 4 |
except ModuleNotFoundError:
|
| 5 |
print(f'Cannot import hf `spaces` with `import spaces`.')
|
|
|
|
|
|
|
| 6 |
import os
|
| 7 |
import numpy as np
|
| 8 |
import argparse
|
|
@@ -541,7 +546,7 @@ class TransformersEngine(BaseEngine):
|
|
| 541 |
if message_safety is not None:
|
| 542 |
raise gr.Error(message_safety)
|
| 543 |
|
| 544 |
-
@
|
| 545 |
def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
|
| 546 |
|
| 547 |
# ! MUST PUT INSIDE torch.no_grad() otherwise it will overflow OOM
|
|
@@ -558,6 +563,12 @@ class TransformersEngine(BaseEngine):
|
|
| 558 |
|
| 559 |
with torch.no_grad():
|
| 560 |
inputs = self.tokenizer(prompt, return_tensors='pt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
num_tokens = inputs.input_ids.size(1)
|
| 562 |
|
| 563 |
inputs = inputs.to(self._model.device)
|
|
@@ -574,7 +585,7 @@ class TransformersEngine(BaseEngine):
|
|
| 574 |
response = None
|
| 575 |
for index, token in enumerate(generator):
|
| 576 |
out_tokens.extend(token.tolist())
|
| 577 |
-
response = self.tokenizer.decode(out_tokens)
|
| 578 |
if "<|im_start|>assistant\n" in response:
|
| 579 |
response = response.split("<|im_start|>assistant\n")[-1]
|
| 580 |
num_tokens += 1
|
|
|
|
| 1 |
|
| 2 |
try:
|
| 3 |
import spaces
|
| 4 |
+
def maybe_spaces_gpu(fn):
|
| 5 |
+
fn = spaces.GPU(fn)
|
| 6 |
+
return fn
|
| 7 |
except ModuleNotFoundError:
|
| 8 |
print(f'Cannot import hf `spaces` with `import spaces`.')
|
| 9 |
+
def maybe_spaces_gpu(fn):
|
| 10 |
+
return fn
|
| 11 |
import os
|
| 12 |
import numpy as np
|
| 13 |
import argparse
|
|
|
|
| 546 |
if message_safety is not None:
|
| 547 |
raise gr.Error(message_safety)
|
| 548 |
|
| 549 |
+
@maybe_spaces_gpu
|
| 550 |
def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
|
| 551 |
|
| 552 |
# ! MUST PUT INSIDE torch.no_grad() otherwise it will overflow OOM
|
|
|
|
| 563 |
|
| 564 |
with torch.no_grad():
|
| 565 |
inputs = self.tokenizer(prompt, return_tensors='pt')
|
| 566 |
+
# whether to print the full prompts
|
| 567 |
+
retok_full_prompt = self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=False)
|
| 568 |
+
print(f"retok_full_prompt:\n{retok_full_prompt}>>>>")
|
| 569 |
+
begin_bos = inputs.input_ids[0][0] == self.tokenizer.bos_token_id
|
| 570 |
+
print(f'begin_bos: {begin_bos}')
|
| 571 |
+
|
| 572 |
num_tokens = inputs.input_ids.size(1)
|
| 573 |
|
| 574 |
inputs = inputs.to(self._model.device)
|
|
|
|
| 585 |
response = None
|
| 586 |
for index, token in enumerate(generator):
|
| 587 |
out_tokens.extend(token.tolist())
|
| 588 |
+
response = self.tokenizer.decode(out_tokens, skip_special_tokens=True)
|
| 589 |
if "<|im_start|>assistant\n" in response:
|
| 590 |
response = response.split("<|im_start|>assistant\n")[-1]
|
| 591 |
num_tokens += 1
|