RolmOCR / app.py
redhairedshanks1's picture
Update app.py
44af924 verified
import gradio as gr
from openai import OpenAI
import base64
from PIL import Image
import io
client = OpenAI(base_url="http://localhost:8000/v1", api_key="not-needed")
def infer(image_np):
# Convert NumPy array to PNG bytes
image = Image.fromarray(image_np)
buffer = io.BytesIO()
image.save(buffer, format="PNG")
b64 = base64.b64encode(buffer.getvalue()).decode()
# Call the model
response = client.chat.completions.create(
model="reducto/RolmOCR",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{b64}"}
},
{
"type": "text",
"text": "Return the plain text representation of this document."
}
],
}
],
)
return response.choices[0].message.content
gr.Interface(fn=infer, inputs="image", outputs="text").launch()