Spaces:

bensheng
/

ocr

Runtime error

ocr

File size: 2,104 Bytes

00ab1fc
 
5bf3ded
00ab1fc
f164b34
00ab1fc
5bf3ded
 
 
00ab1fc
 
 
5bf3ded
761c493
00ab1fc
 
 
 
 
 
761c493
 
 
5bf3ded
 
761c493
5bf3ded
 
00ab1fc
 
5bf3ded
00ab1fc
5bf3ded
 
 
00ab1fc
 
 
 
5bf3ded
00ab1fc
761c493
00ab1fc
5bf3ded
00ab1fc
5bf3ded
 
 
 
 
00ab1fc
 
5bf3ded
00ab1fc
761c493
 
 
 
5bf3ded
761c493
 
00ab1fc

import gradio as gr
from huggingface_hub import InferenceClient
import base64

client = InferenceClient("openbmb/MiniCPM-Llama3-V-2_5-int4")

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def respond(
    message,
    image,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    
    if image:
        base64_image = encode_image(image)
        image_message = f"<image>{base64_image}</image>"
        message = image_message + "\n" + message

    messages.append({"role": "user", "content": message})
    
    response = ""
    for message in client.text_generation(
        prompt=f"{messages}",
        max_new_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.token.text
        response += token
        yield response, history + [(message, response)]

demo = gr.Interface(
    respond,
    inputs=[
        gr.Textbox(label="Message"),
        gr.Image(type="filepath", label="Upload Image"),
        gr.State([]),  # for history
        gr.Textbox(value="You are a friendly AI assistant capable of understanding images and text.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    outputs=[
        gr.Textbox(label="Response"),
        gr.State()  # for updated history
    ],
    title="MiniCPM-Llama3-V-2_5 Image and Text Chat",
    description="Upload an image and ask questions about it, or just chat without an image.",
    allow_flagging="never"
)

if __name__ == "__main__":
    demo.launch()