InternLM-Math's picture
Update app.py
d85bad6 verified
raw
history blame
7.43 kB
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
import streamlit as st
from openxlab.model import download
from modelscope import snapshot_download
import os
# level = os.getenv('level')
level = '7'
with st.sidebar:
st.markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
max_length = st.slider("max_length", 0, 1024, 512, step=1)
# system_prompt = st.text_input("System_Prompt", "")
st.title("InternLM2-math-7B")
st.caption("🚀 Powered By Shanghai Ai Lab")
# 定义模型路径
## ModelScope
# model_id = 'Shanghai_AI_Laboratory/internlm2-chat-'+ str(level) +'b'
# mode_name_or_path = snapshot_download(model_id, revision='master')
mode_name_or_path = "internlm/internlm2-math-7b"
# OpenXLab
# model_repo = "OpenLMLab/internlm2-chat-7b"
# mode_name_or_path = download(model_repo=model_repo)
@st.cache_resource
def get_model():
tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(mode_name_or_path, trust_remote_code=True, torch_dtype=torch.bfloat16).cuda()
model.eval()
return tokenizer, model
tokenizer, model = get_model()
if "messages" not in st.session_state:
st.session_state["messages"] = []
for msg in st.session_state.messages:
st.chat_message("user").write(msg[0])
st.chat_message("assistant").write(msg[1])
if prompt := st.chat_input():
st.chat_message("user").write(prompt)
response, history = model.chat(tokenizer, prompt, meta_instruction='', history=st.session_state.messages)
st.session_state.messages.append((prompt, response))
st.chat_message("assistant").write(response)
# import os
# os.system("pip uninstall -y gradio")
# os.system("pip install gradio==3.43.0")
# from lmdeploy.serve.gradio.turbomind_coupled import *
# from lmdeploy.messages import TurbomindEngineConfig
# from lmdeploy import ChatTemplateConfig
# chat_template = ChatTemplateConfig(model_name='internlm2-chat-7b', system='', eosys='', meta_instruction='')
# backend_config = TurbomindEngineConfig(model_name='internlm2-chat-7b', max_batch_size=1, cache_max_entry_count=0.05)#, model_format='awq')
# model_path = 'internlm/internlm2-math-7b'
# InterFace.async_engine = AsyncEngine(
# model_path=model_path,
# backend='turbomind',
# backend_config=backend_config,
# chat_template_config=chat_template,
# tp=1)
# async def reset_local_func(instruction_txtbox: gr.Textbox,
# state_chatbot: Sequence, session_id: int):
# """reset the session.
# Args:
# instruction_txtbox (str): user's prompt
# state_chatbot (Sequence): the chatting history
# session_id (int): the session id
# """
# state_chatbot = []
# # end the session
# with InterFace.lock:
# InterFace.global_session_id += 1
# session_id = InterFace.global_session_id
# return (state_chatbot, state_chatbot, gr.Textbox.update(value=''), session_id)
# async def cancel_local_func(state_chatbot: Sequence, cancel_btn: gr.Button,
# reset_btn: gr.Button, session_id: int):
# """stop the session.
# Args:
# instruction_txtbox (str): user's prompt
# state_chatbot (Sequence): the chatting history
# cancel_btn (gr.Button): the cancel button
# reset_btn (gr.Button): the reset button
# session_id (int): the session id
# """
# yield (state_chatbot, disable_btn, disable_btn, session_id)
# InterFace.async_engine.stop_session(session_id)
# # pytorch backend does not support resume chat history now
# if InterFace.async_engine.backend == 'pytorch':
# yield (state_chatbot, disable_btn, enable_btn, session_id)
# else:
# with InterFace.lock:
# InterFace.global_session_id += 1
# session_id = InterFace.global_session_id
# messages = []
# for qa in state_chatbot:
# messages.append(dict(role='user', content=qa[0]))
# if qa[1] is not None:
# messages.append(dict(role='assistant', content=qa[1]))
# gen_config = GenerationConfig(max_new_tokens=0)
# async for out in InterFace.async_engine.generate(messages,
# session_id,
# gen_config=gen_config,
# stream_response=True,
# sequence_start=True,
# sequence_end=False):
# pass
# yield (state_chatbot, disable_btn, enable_btn, session_id)
# with gr.Blocks(css=CSS, theme=THEME) as demo:
# state_chatbot = gr.State([])
# state_session_id = gr.State(0)
# with gr.Column(elem_id='container'):
# gr.Markdown('## LMDeploy Playground')
# gr.Markdown('[InternLM Math GitHub Page](https://github.com/InternLM/InternLM-Math)')
# chatbot = gr.Chatbot(
# elem_id='chatbot',
# label=InterFace.async_engine.engine.model_name)
# instruction_txtbox = gr.Textbox(
# placeholder='Please input the instruction',
# label='Instruction')
# with gr.Row():
# cancel_btn = gr.Button(value='Cancel', interactive=False)
# reset_btn = gr.Button(value='Reset')
# with gr.Row():
# request_output_len = gr.Slider(1,
# 1024,
# value=512,
# step=1,
# label='Maximum new tokens')
# top_p = gr.Slider(0.01, 1, value=1.0, step=0.01, label='Top_p')
# temperature = gr.Slider(0.01,
# 1.5,
# value=0.01,
# step=0.01,
# label='Temperature')
# send_event = instruction_txtbox.submit(chat_stream_local, [
# instruction_txtbox, state_chatbot, cancel_btn, reset_btn,
# state_session_id, top_p, temperature, request_output_len
# ], [state_chatbot, chatbot, cancel_btn, reset_btn])
# instruction_txtbox.submit(
# lambda: gr.Textbox.update(value=''),
# [],
# [instruction_txtbox],
# )
# cancel_btn.click(
# cancel_local_func,
# [state_chatbot, cancel_btn, reset_btn, state_session_id],
# [state_chatbot, cancel_btn, reset_btn, state_session_id],
# cancels=[send_event])
# reset_btn.click(reset_local_func,
# [instruction_txtbox, state_chatbot, state_session_id],
# [state_chatbot, chatbot, instruction_txtbox, state_session_id],
# cancels=[send_event])
# def init():
# with InterFace.lock:
# InterFace.global_session_id += 1
# new_session_id = InterFace.global_session_id
# return new_session_id
# demo.load(init, inputs=None, outputs=[state_session_id])
# # demo.queue(concurrency_count=InterFace.async_engine.instance_num,
# # max_size=100).launch()
# demo.queue(max_size=1000).launch(max_threads=InterFace.async_engine.instance_num)