Spaces:

shuaikang
/

JSL-MedMX-7X

Paused

App Files Files Community

shuaikang commited on May 24, 2024

Commit

96a12df

verified ·

1 Parent(s): 90bdb08

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -106

app.py CHANGED Viewed

@@ -1,110 +1,117 @@
-from transformers import AutoModel, AutoTokenizer
 import gradio as gr
-import mdtex2html
-#from utils import load_model_on_gpus
-tokenizer = AutoTokenizer.from_pretrained("sethuiyer/Medichat-Llama3-8B", trust_remote_code=True)
-#model = AutoModel.from_pretrained("sethuiyer/Medichat-Llama3-8B", trust_remote_code=True).cuda()
-model = AutoModel.from_pretrained("sethuiyer/Medichat-Llama3-8B", trust_remote_code=True)
-# 多显卡支持，使用下面两行代替上面一行，将num_gpus改为你实际的显卡数量
-# from utils import load_model_on_gpus
-# model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
-model = model.eval()
-"""Override Chatbot.postprocess"""
-def postprocess(self, y):
-    if y is None:
-        return []
-    for i, (message, response) in enumerate(y):
-        y[i] = (
-            None if message is None else mdtex2html.convert((message)),
-            None if response is None else mdtex2html.convert(response),
-        )
-    return y
-gr.Chatbot.postprocess = postprocess
-def parse_text(text):
-    """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
-    lines = text.split("\n")
-    lines = [line for line in lines if line != ""]
-    count = 0
-    for i, line in enumerate(lines):
-        if "```" in line:
-            count += 1
-            items = line.split('`')
-            if count % 2 == 1:
-                lines[i] = f'<pre><code class="language-{items[-1]}">'
-            else:
-                lines[i] = f'<br></code></pre>'
-        else:
-            if i > 0:
-                if count % 2 == 1:
-                    line = line.replace("`", "\`")
-                    line = line.replace("<", "&lt;")
-                    line = line.replace(">", "&gt;")
-                    line = line.replace(" ", "&nbsp;")
-                    line = line.replace("*", "&ast;")
-                    line = line.replace("_", "&lowbar;")
-                    line = line.replace("-", "&#45;")
-                    line = line.replace(".", "&#46;")
-                    line = line.replace("!", "&#33;")
-                    line = line.replace("(", "&#40;")
-                    line = line.replace(")", "&#41;")
-                    line = line.replace("$", "&#36;")
-                lines[i] = "<br>"+line
-    text = "".join(lines)
-    return text
-def predict(input, chatbot, max_length, top_p, temperature, history, past_key_values):
-    chatbot.append((parse_text(input), ""))
-    for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
-                                                                return_past_key_values=True,
-                                                                max_length=max_length, top_p=top_p,
-                                                                temperature=temperature):
-        chatbot[-1] = (parse_text(input), parse_text(response))
-        yield chatbot, history, past_key_values
-def reset_user_input():
-    return gr.update(value='')
-def reset_state():
-    return [], [], None
 with gr.Blocks() as demo:
-    gr.HTML("""<h1 align="center">ChatGLM2-6B</h1>""")
-    chatbot = gr.Chatbot()
-    with gr.Row():
-        with gr.Column(scale=4):
-            with gr.Column(scale=12):
-                user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(
-                    container=False)
-            with gr.Column(min_width=32, scale=1):
-                submitBtn = gr.Button("Submit", variant="primary")
-        with gr.Column(scale=1):
-            emptyBtn = gr.Button("Clear History")
-            max_length = gr.Slider(0, 32768, value=8192, step=1.0, label="Maximum length", interactive=True)
-            top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
-            temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
-    history = gr.State([])
-    past_key_values = gr.State(None)
-    submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history, past_key_values],
-                    [chatbot, history, past_key_values], show_progress=True)
-    submitBtn.click(reset_user_input, [], [user_input])
-    emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
-demo.queue().launch(server_name="0.0.0.0",server_port=7860,share=False, inbrowser=True)

+import torch
 import gradio as gr
+from threading import Thread
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+device = "cuda"  # the device to load the model onto
+#device = "cpu"  # the device to load the model onto
+bot_avatar = "shuaikang/dl_logo_rect.png"           # 聊天机器人头像位置
+user_avatar = "shuaikang/user_avatar.jpg"           # 用户头像位置
+#model_path = "sethuiyer/Medichat-Llama3-8B"   # 已下载的模型位置
+#model_path = "johnsnowlabs/JSL-MedMX-7X"
+model_path = "aaditya/Llama3-OpenBioLLM-8B"
+# 存储全局的历史对话记录，Llama3支持系统prompt，所以这里默认设置！
+llama3_chat_history = [
+    {"role": "system", "content": "You are a helpful assistant trained by MetaAI! But you are running with DataLearnerAI Code."}
+]
+# 初始化所有变量，用于载入模型
+tokenizer = None
+streamer = None
+model = None
+terminators = None
+def init_model():
+    """初始化模型，载入本地模型
+    """
+    global tokenizer, model, streamer, terminators
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_path, local_files_only=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        torch_dtype=torch.float16,
+        device_map=device,
+        trust_remote_code=True
+    )
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    streamer = TextIteratorStreamer(
+        tokenizer,
+        skip_prompt=True,
+        skip_special_tokens=True
+    )
 with gr.Blocks() as demo:
+    # step1: 载入模型
+    init_model()
+    # step2: 初始化gradio的chatbot应用，并添加按钮等信息
+    chatbot = gr.Chatbot(
+        height=900,
+        avatar_images=(user_avatar, bot_avatar)
+    )
+    msg = gr.Textbox()
+    clear = gr.ClearButton([msg, chatbot])
+    # 清楚历史记录
+    def clear_history():
+        global llama3_chat_history
+        llama3_chat_history = []
+    # 用于回复的方法
+    def respond(message, chat_history):
+        global llama3_chat_history, tokenizer, model, streamer
+        llama3_chat_history.append({"role": "user", "content": message})
+        # 使用Llama3自带的聊天模板，格式化对话记录
+        history_str = tokenizer.apply_chat_template(
+            llama3_chat_history,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        # tokenzier
+        inputs = tokenizer(history_str, return_tensors='pt').to(device)
+        chat_history.append([message, ""])
+        generation_kwargs = dict(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=4096,
+            num_beams=1,
+            do_sample=True,
+            top_p=0.8,
+            temperature=0.3,
+            eos_token_id=terminators
+        )
+        # 启动线程，用以监控流失输出结果
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        for new_text in streamer:
+            chat_history[-1][1] += new_text
+            yield "", chat_history
+        llama3_chat_history.append(
+            {"role": "assistant", "content": chat_history[-1][1]}
+        )
+    # 点击清楚按钮，触发历史记录清楚
+    clear.click(clear_history)
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+if __name__ == "__main__":
+    demo.queue(concurrency_count=1, max_size=1).launch(server_name="0.0.0.0", server_port=7860)