|
import argparse |
|
import gradio as gr |
|
from openai import OpenAI |
|
import os |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_url = os.getenv('MODEL_URL', 'http://localhost:8000/v1') |
|
model_name = os.getenv('MODEL_NAME', 'default-model-name') |
|
temperature = float(os.getenv('TEMPERATURE', 0.8)) |
|
stop_token_ids = os.getenv('STOP_TOKEN_IDS', '') |
|
host = os.getenv('HOST', '0.0.0.0') |
|
port = os.getenv('PORT', 8001) |
|
|
|
|
|
openai_api_key = "EMPTY" |
|
openai_api_base = model_url |
|
|
|
|
|
client = OpenAI( |
|
api_key=openai_api_key, |
|
base_url=openai_api_base, |
|
) |
|
|
|
|
|
|
|
def predict(message, history): |
|
|
|
history_openai_format = [] |
|
|
|
|
|
|
|
for human, assistant in history: |
|
history_openai_format.append({"role": "user", "content": human}) |
|
history_openai_format.append({ |
|
"role": "assistant", |
|
"content": assistant |
|
}) |
|
history_openai_format.append({"role": "user", "content": message}) |
|
|
|
|
|
stream = client.chat.completions.create( |
|
model=args.model, |
|
messages=history_openai_format, |
|
temperature=args.temp, |
|
stream=True, |
|
extra_body={ |
|
'repetition_penalty': |
|
1, |
|
'stop_token_ids': [ |
|
int(id.strip()) for id in args.stop_token_ids.split(',') |
|
if id.strip() |
|
] if args.stop_token_ids else [] |
|
}) |
|
|
|
|
|
partial_message = "" |
|
for chunk in stream: |
|
partial_message += (chunk.choices[0].delta.content or "") |
|
yield partial_message |
|
|
|
with gr.Blocks(title="MethodAI 0.15", theme="Soft") as demo: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.UploadButton("Click to upload PDFs",file_types=[".pdf"]) |
|
with gr.Column(scale=4): |
|
|
|
gr.ChatInterface(predict).queue() |
|
|
|
|
|
demo.launch(server_name=host, server_port=int(port), share=True) |
|
|