Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoProcessor, AutoModelForImageTextToText | |
from PIL import Image | |
import torch | |
import os | |
import spaces # Import the spaces module | |
def load_model(): | |
"""Load PaliGemma2 model and processor with Hugging Face token.""" | |
token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Retrieve token from environment variable | |
if not token: | |
raise ValueError( | |
"Hugging Face API token not found. Please set it in the environment variables." | |
) | |
# Load the processor and model using the correct identifier | |
processor = AutoProcessor.from_pretrained( | |
"google/paligemma2-3b-pt-224", use_auth_token=token | |
) | |
model = AutoModelForImageTextToText.from_pretrained( | |
"google/paligemma2-3b-pt-224", use_auth_token=token | |
) | |
return processor, model | |
# Decorate the function that uses the GPU | |
def process_image(image): | |
"""Extract text from image using PaliGemma2.""" | |
processor, model = load_model() | |
# Preprocess the image | |
inputs = processor(images=image, return_tensors="pt") | |
# Generate predictions | |
with torch.no_grad(): | |
generated_ids = model.generate(**inputs) | |
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return text | |
if __name__ == "__main__": | |
iface = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil", label="Upload an image containing text"), | |
outputs=gr.Textbox(label="Extracted Text"), | |
title="Text Reading from Images using PaliGemma2", | |
description="Upload an image containing text and the model will extract the text.", | |
) | |
iface.launch() |