Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,683 Bytes
c91d9f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import torch
import os
import spaces # Import the spaces module
def load_model():
"""Load PaliGemma2 model and processor with Hugging Face token."""
token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable
if not token:
raise ValueError(
"Hugging Face API token not found. Please set it in the environment variables."
)
# Load the processor and model using the correct identifier
processor = AutoProcessor.from_pretrained(
"google/paligemma2-3b-pt-224", use_auth_token=token
)
model = AutoModelForImageTextToText.from_pretrained(
"google/paligemma2-3b-pt-224", use_auth_token=token
)
return processor, model
@spaces.GPU # Decorate the function that uses the GPU
def process_image(image):
"""Extract text from image using PaliGemma2."""
processor, model = load_model()
# Preprocess the image
inputs = processor(images=image, return_tensors="pt")
# Generate predictions
with torch.no_grad():
generated_ids = model.generate(**inputs)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return text
if __name__ == "__main__":
iface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil", label="Upload an image containing text"),
outputs=gr.Textbox(label="Extracted Text"),
title="Text Reading from Images using PaliGemma2",
description="Upload an image containing text and the model will extract the text.",
)
iface.launch() |