SeaLLM-7B-v2.5-simple

Runtime error

App Files Files Community

nxphi47 commited on Nov 3, 2023

Commit

7151461

1 Parent(s): 69bfc1e

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -19

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import Iterator, List, Optional, Tuple
 import filelock
 import glob
 import json
 from gradio_client.documentation import document, set_documentation_group
@@ -51,6 +52,23 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 # ! path where the model is downloaded, either on ./ or persistent disc
 MODEL_PATH = os.environ.get("MODEL_PATH", "./seal-13b-chat-a")
 # ! !! Whether to delete the folder, ONLY SET THIS IF YOU WANT TO DELETE SAVED MODEL ON PERSISTENT DISC
 DELETE_FOLDER = os.environ.get("DELETE_FOLDER", "")
 IS_DELETE_FOLDER = DELETE_FOLDER is not None and os.path.exists(DELETE_FOLDER)
@@ -86,13 +104,18 @@ Internal instructions of how to configure the DEMO
 1. Upload SFT model as a model to huggingface: hugginface/models/seal_13b_a
 2. If the model weights is private, set HF_TOKEN=<your private hf token> in https://huggingface.co/spaces/????/?????/settings
-3. space config env: `HF_MODEL_NAME=DAMO-NLP-SG/seal-13b-chat-a` or the underlining model
 4. If enable persistent storage: set
 HF_HOME=/data/.huggingface
 MODEL_PATH=/data/.huggingface/seal-13b-chat-a
 if not:
 MODEL_PATH=./seal-13b-chat-a
 """
 # ==============================
@@ -127,6 +150,7 @@ EOS_TOKEN = '</s>'
 B_INST, E_INST = "[INST]", "[/INST]"
 B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
 SYSTEM_PROMPT_1 = """You are a multilingual, helpful, respectful and honest assistant. Your name is SeaLLM and you are built by DAMO Academy, Alibaba Group. \
 Please always answer as helpfully as possible, while being safe. Your \
 answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure \
@@ -168,12 +192,12 @@ MODEL_TITLE = """
       </div>
 </div>
 """
 MODEL_DESC = """
 <div style='display:flex; gap: 0.25rem; '>
-<a href=''><img src='https://img.shields.io/badge/Github-Code-success'></a>
 <a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
 <a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
-<a href=''><img src='https://img.shields.io/badge/Paper-PDF-red'></a>
 </div>
 <span style="font-size: larger">
 This is <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a chatbot assistant optimized for Southeast Asian Languages. It produces helpful responses in English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩 and Thai 🇹🇭.
@@ -182,7 +206,7 @@ Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank"
 <br>
 <span >
 NOTE: The chatbot may produce inaccurate and harmful information about people, places, or facts.
-<u style="color: red">By using our service, you are required to agree to the following terms:</u><br>
 <ul>
 <li >
 You must not use our service to generate any harmful, unethical or illegal content that violates locally applicable and international laws or regulations,
@@ -725,6 +749,7 @@ from gradio.events import Dependency, EventListenerMethod
 def _setup_stop_events(
     self, event_triggers: list[EventListenerMethod], event_to_cancel: Dependency
 ) -> None:
     event_triggers = event_triggers if isinstance(event_triggers, (list, tuple)) else [event_triggers]
     if self.stop_btn and self.is_generator:
         if self.submit_btn:
@@ -799,6 +824,7 @@ def _setup_stop_events(
 # TODO: reconfigure clear button as stop and clear button
 def _setup_events(self) -> None:
     has_on = False
     try:
         from gradio.events import Dependency, EventListenerMethod, on
@@ -807,6 +833,14 @@ def _setup_events(self) -> None:
         has_on = False
     submit_fn = self._stream_fn if self.is_generator else self._submit_fn
     if has_on:
         # new version
@@ -831,6 +865,13 @@ def _setup_events(self) -> None:
                 api_name=False,
                 queue=False,
             )
             .then(
                 submit_fn,
                 [self.saved_input, self.chatbot_state] + self.additional_inputs,
@@ -912,6 +953,7 @@ def vllm_abort(self: Any):
                     continue
                 scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
 def _vllm_run_engine(self: Any, use_tqdm: bool = False) -> Dict[str, Any]:
     from vllm.outputs import RequestOutput
     # Initialize tqdm.
@@ -1027,10 +1069,6 @@ def block_lang(
             return False
-def log_responses(history, message, response):
-    pass
 def safety_check(text, history=None, ) -> Optional[str]:
     """
     Despite our effort in safety tuning and red teaming, our models may still generate harmful or illegal content.
@@ -1052,8 +1090,10 @@ def chat_response_stream_multiturn(
     temperature: float,
     max_tokens: int,
     frequency_penalty: float,
     system_prompt: Optional[str] = SYSTEM_PROMPT_1
 ) -> str:
     from vllm import LLM, SamplingParams
     """Build multi turn
     <bos>[INST] B_SYS SytemPrompt E_SYS Prompt [/INST] Answer <eos>
@@ -1075,6 +1115,12 @@ def chat_response_stream_multiturn(
     max_tokens = int(max_tokens)
     message = message.strip()
     if len(message) == 0:
         raise gr.Error("The message cannot be empty!")
@@ -1114,8 +1160,12 @@ def chat_response_stream_multiturn(
         assert len(gen) == 1, f'{gen}'
         item = next(iter(gen.values()))
         cur_out = item.outputs[0].text
-    print(f'@@@@@@@@@@\n{full_prompt}<<<{cur_out}>>>\n##########\n')
     if cur_out is not None and "\\n" in cur_out:
         print(f'double slash-n in cur_out:\n{cur_out}')
@@ -1128,11 +1178,51 @@ def chat_response_stream_multiturn(
     if message_safety is not None:
         yield message_safety
         return
-    if LOG_RESPONSE:
-        log_responses(history, message, cur_out)
 def debug_chat_response_echo(
     message: str,
@@ -1140,11 +1230,23 @@ def debug_chat_response_echo(
     temperature: float = 0.0,
     max_tokens: int = 4096,
     frequency_penalty: float = 0.4,
     system_prompt: str = SYSTEM_PROMPT_1,
 ) -> str:
     import time
     time.sleep(0.5)
-    yield f"repeat: {message}"
 def check_model_path(model_path) -> str:
@@ -1162,7 +1264,6 @@ def check_model_path(model_path) -> str:
     return ckpt_info
 def maybe_delete_folder():
     if IS_DELETE_FOLDER and DOWNLOAD_SNAPSHOT:
         import shutil
@@ -1184,7 +1285,7 @@ async () => {
 """
 def launch():
-    global demo, llm, DEBUG
     model_desc = MODEL_DESC
     model_path = MODEL_PATH
     model_title = MODEL_TITLE
@@ -1199,8 +1300,11 @@ def launch():
     ckpt_info = "None"
     print(
-        f'Launch config: {tensor_parallel=} / {dtype=} / {max_tokens} '
         f'\n| model_title=`{model_title}` '
         f'\n| BLOCK_LANGS={BLOCK_LANGS} '
         f'\n| IS_DELETE_FOLDER={IS_DELETE_FOLDER} '
         f'\n| STREAM_YIELD_MULTIPLE={STREAM_YIELD_MULTIPLE} '
@@ -1214,6 +1318,8 @@ def launch():
         f'\n| DOWNLOAD_SNAPSHOT={DOWNLOAD_SNAPSHOT} '
         f'\n| gpu_memory_utilization={gpu_memory_utilization} '
         f'\n| KEYWORDS={KEYWORDS} '
         f'\n| Sys={SYSTEM_PROMPT_1}'
         f'\n| Desc={model_desc}'
     )
@@ -1222,6 +1328,8 @@ def launch():
         model_desc += "\n<br>!!!!! This is in debug mode, responses will copy original"
         response_fn = debug_chat_response_echo
         print(f'Creating in DEBUG MODE')
     else:
         # ! load the model
         maybe_delete_folder()
@@ -1265,6 +1373,9 @@ def launch():
         response_fn = chat_response_stream_multiturn
         print(F'respond: {response_fn}')
     demo = gr.ChatInterface(
         response_fn,
         chatbot=ChatBot(
@@ -1286,6 +1397,7 @@ def launch():
             gr.Number(value=temperature, label='Temperature (higher -> more random)'),
             gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
             gr.Number(value=frequence_penalty, label='Frequency penalty (> 0 encourage new tokens)'),
             # ! Remove the system prompt textbox to avoid jailbreaking
             # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
         ],
@@ -1310,5 +1422,4 @@ def main():
 if __name__ == "__main__":
-    main()

 import filelock
 import glob
 import json
+import time
 from gradio_client.documentation import document, set_documentation_group
 # ! path where the model is downloaded, either on ./ or persistent disc
 MODEL_PATH = os.environ.get("MODEL_PATH", "./seal-13b-chat-a")
+# ! log path
+LOG_PATH = os.environ.get("LOG_PATH", "").strip()
+LOG_FILE = None
+SAVE_LOGS = LOG_PATH is not None and LOG_PATH != ''
+if SAVE_LOGS:
+    if os.path.exists(LOG_PATH):
+        print(f'LOG_PATH exist: {LOG_PATH}')
+    else:
+        LOG_DIR = os.path.dirname(LOG_PATH)
+        os.makedirs(LOG_DIR, exist_ok=True)
+# ! get LOG_PATH as aggregated outputs in log
+GET_LOG_CMD = os.environ.get("GET_LOG_CMD", "").strip()
+print(f'SAVE_LOGS: {SAVE_LOGS} | {LOG_PATH}')
+print(f'GET_LOG_CMD: {GET_LOG_CMD}')
 # ! !! Whether to delete the folder, ONLY SET THIS IF YOU WANT TO DELETE SAVED MODEL ON PERSISTENT DISC
 DELETE_FOLDER = os.environ.get("DELETE_FOLDER", "")
 IS_DELETE_FOLDER = DELETE_FOLDER is not None and os.path.exists(DELETE_FOLDER)
 1. Upload SFT model as a model to huggingface: hugginface/models/seal_13b_a
 2. If the model weights is private, set HF_TOKEN=<your private hf token> in https://huggingface.co/spaces/????/?????/settings
+3. space config env: `HF_MODEL_NAME=SeaLLMs/seal-13b-chat-a` or the underlining model
 4. If enable persistent storage: set
 HF_HOME=/data/.huggingface
 MODEL_PATH=/data/.huggingface/seal-13b-chat-a
 if not:
 MODEL_PATH=./seal-13b-chat-a
+HF_HOME=/data/.huggingface
+MODEL_PATH=/data/ckpt/seal-13b-chat-a
+DELETE_FOLDER=/data/
 """
 # ==============================
 B_INST, E_INST = "[INST]", "[/INST]"
 B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+# TODO: should Hide the system prompt
 SYSTEM_PROMPT_1 = """You are a multilingual, helpful, respectful and honest assistant. Your name is SeaLLM and you are built by DAMO Academy, Alibaba Group. \
 Please always answer as helpfully as possible, while being safe. Your \
 answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure \
       </div>
 </div>
 """
+# <a href=''><img src='https://img.shields.io/badge/Paper-PDF-red'></a>
 MODEL_DESC = """
 <div style='display:flex; gap: 0.25rem; '>
+<a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
 <a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
 <a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
 </div>
 <span style="font-size: larger">
 This is <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a chatbot assistant optimized for Southeast Asian Languages. It produces helpful responses in English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩 and Thai 🇹🇭.
 <br>
 <span >
 NOTE: The chatbot may produce inaccurate and harmful information about people, places, or facts.
+<span style="color: red">By using our service, you are required to agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">SeaLLM Terms Of Use</a>, which include:</span><br>
 <ul>
 <li >
 You must not use our service to generate any harmful, unethical or illegal content that violates locally applicable and international laws or regulations,
 def _setup_stop_events(
     self, event_triggers: list[EventListenerMethod], event_to_cancel: Dependency
 ) -> None:
+    from gradio.components import State
     event_triggers = event_triggers if isinstance(event_triggers, (list, tuple)) else [event_triggers]
     if self.stop_btn and self.is_generator:
         if self.submit_btn:
 # TODO: reconfigure clear button as stop and clear button
 def _setup_events(self) -> None:
+    from gradio.components import State
     has_on = False
     try:
         from gradio.events import Dependency, EventListenerMethod, on
         has_on = False
     submit_fn = self._stream_fn if self.is_generator else self._submit_fn
+    def update_time(c_time, chatbot_state):
+        # if chatbot_state is empty, register a new conversaion with the current timestamp
+        assert len(chatbot_state) > 0, f'empty chatbot state'
+        if len(chatbot_state) == 1:
+            assert chatbot_state[-1][-1] is None, f'invalid [[message, None]] , got {chatbot_state}'
+            return gr.Number(value=time.time(), label='current_time', visible=False), chatbot_state
+        else:
+            return c_time, chatbot_state
     if has_on:
         # new version
                 api_name=False,
                 queue=False,
             )
+            .then(
+                update_time,
+                [self.additional_inputs[-1], self.chatbot_state],
+                [self.additional_inputs[-1], self.chatbot_state],
+                api_name=False,
+                queue=False,
+            )
             .then(
                 submit_fn,
                 [self.saved_input, self.chatbot_state] + self.additional_inputs,
                     continue
                 scheduler.free_seq(seq, SequenceStatus.FINISHED_ABORTED)
 def _vllm_run_engine(self: Any, use_tqdm: bool = False) -> Dict[str, Any]:
     from vllm.outputs import RequestOutput
     # Initialize tqdm.
             return False
 def safety_check(text, history=None, ) -> Optional[str]:
     """
     Despite our effort in safety tuning and red teaming, our models may still generate harmful or illegal content.
     temperature: float,
     max_tokens: int,
     frequency_penalty: float,
+    current_time: Optional[float] = None,
     system_prompt: Optional[str] = SYSTEM_PROMPT_1
 ) -> str:
+    global LOG_FILE, LOG_PATH
     from vllm import LLM, SamplingParams
     """Build multi turn
     <bos>[INST] B_SYS SytemPrompt E_SYS Prompt [/INST] Answer <eos>
     max_tokens = int(max_tokens)
     message = message.strip()
+    if message.strip() == GET_LOG_CMD:
+        print_log_file()
+        yield "Finish printed log. Please clear the chatbox now."
+        return
     if len(message) == 0:
         raise gr.Error("The message cannot be empty!")
         assert len(gen) == 1, f'{gen}'
         item = next(iter(gen.values()))
         cur_out = item.outputs[0].text
+    # TODO: use current_time to register conversations, accoriding history and cur_out
+    history_str = format_conversation(history + [[message, cur_out]])
+    print(f'@@@@@@@@@@\n{history_str}\n##########\n')
+    maybe_log_conv_file(current_time, history, message, cur_out, temperature=temperature, frequency_penalty=frequency_penalty)
     if cur_out is not None and "\\n" in cur_out:
         print(f'double slash-n in cur_out:\n{cur_out}')
     if message_safety is not None:
         yield message_safety
         return
+def maybe_log_conv_file(current_time, history, message, response, **kwargs):
+    global LOG_FILE
+    if LOG_FILE is not None:
+        my_history = history + [[message, response]]
+        obj = {
+            'key': str(current_time),
+            'history': my_history
+        }
+        for k, v in kwargs.items():
+            obj[k] = v
+        log_ = json.dumps(obj, ensure_ascii=False)
+        LOG_FILE.write(log_ + "\n")
+        LOG_FILE.flush()
+        print(f'Wrote {obj["key"]} to {LOG_PATH}')
+def format_conversation(history):
+    _str = '\n'.join([
+        (
+            f'<<<User>>> {h[0]}\n'
+            f'<<<Asst>>> {h[1]}'
+        )
+        for h in history
+    ])
+    return _str
+def print_log_file():
+    global LOG_FILE, LOG_PATH
+    if SAVE_LOGS and os.path.exists(LOG_PATH):
+        with open(LOG_PATH, 'r', encoding='utf-8') as f:
+            convos = {}
+            for l in f:
+                if l:
+                    item = json.loads(l)
+                    convos[item['key']] = item
+            print(f'Printing log from {LOG_PATH}')
+            for k, v in convos.items():
+                history = v.pop('history')
+                print(f'######--{v}--##')
+                _str = format_conversation(history)
+                print(_str)
 def debug_chat_response_echo(
     message: str,
     temperature: float = 0.0,
     max_tokens: int = 4096,
     frequency_penalty: float = 0.4,
+    current_time: Optional[float] = None,
     system_prompt: str = SYSTEM_PROMPT_1,
 ) -> str:
+    global LOG_FILE
     import time
     time.sleep(0.5)
+    if message.strip() == GET_LOG_CMD:
+        print_log_file()
+        yield "Finish printed log."
+        return
+    for i in range(len(message)):
+        yield f"repeat: {current_time} {message[:i + 1]}"
+    cur_out = f"repeat: {current_time} {message}"
+    maybe_log_conv_file(current_time, history, message, cur_out, temperature=temperature, frequency_penalty=frequency_penalty)
 def check_model_path(model_path) -> str:
     return ckpt_info
 def maybe_delete_folder():
     if IS_DELETE_FOLDER and DOWNLOAD_SNAPSHOT:
         import shutil
 """
 def launch():
+    global demo, llm, DEBUG, LOG_FILE
     model_desc = MODEL_DESC
     model_path = MODEL_PATH
     model_title = MODEL_TITLE
     ckpt_info = "None"
     print(
+        f'Launch config: '
         f'\n| model_title=`{model_title}` '
+        f'\n| max_tokens={max_tokens} '
+        f'\n| dtype={dtype} '
+        f'\n| tensor_parallel={tensor_parallel} '
         f'\n| BLOCK_LANGS={BLOCK_LANGS} '
         f'\n| IS_DELETE_FOLDER={IS_DELETE_FOLDER} '
         f'\n| STREAM_YIELD_MULTIPLE={STREAM_YIELD_MULTIPLE} '
         f'\n| DOWNLOAD_SNAPSHOT={DOWNLOAD_SNAPSHOT} '
         f'\n| gpu_memory_utilization={gpu_memory_utilization} '
         f'\n| KEYWORDS={KEYWORDS} '
+        f'\n| LOG_PATH={LOG_PATH} | SAVE_LOGS={SAVE_LOGS} '
+        f'\n| GET_LOG_CMD={GET_LOG_CMD} '
         f'\n| Sys={SYSTEM_PROMPT_1}'
         f'\n| Desc={model_desc}'
     )
         model_desc += "\n<br>!!!!! This is in debug mode, responses will copy original"
         response_fn = debug_chat_response_echo
         print(f'Creating in DEBUG MODE')
+        if SAVE_LOGS:
+            LOG_FILE = open(LOG_PATH, 'a', encoding='utf-8')
     else:
         # ! load the model
         maybe_delete_folder()
         response_fn = chat_response_stream_multiturn
         print(F'respond: {response_fn}')
+        if SAVE_LOGS:
+            LOG_FILE = open(LOG_PATH, 'a', encoding='utf-8')
     demo = gr.ChatInterface(
         response_fn,
         chatbot=ChatBot(
             gr.Number(value=temperature, label='Temperature (higher -> more random)'),
             gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
             gr.Number(value=frequence_penalty, label='Frequency penalty (> 0 encourage new tokens)'),
+            gr.Number(value=0, label='current_time', visible=False),
             # ! Remove the system prompt textbox to avoid jailbreaking
             # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
         ],
 if __name__ == "__main__":
+    main()