Spaces:

HARISH20205
/

verbisense

Runtime error

File size: 1,729 Bytes

c8c7a9e

import easyocr
import requests
import io
from PIL import Image
from typing import List, Dict, Any
import os
import numpy as np
from gradio_client import Client


def process_image_file(image_url: str) -> List[Dict[str, Any]]:
    # Fetch the image content from the URL
    response = requests.get(image_url)

    # Check if the request was successful
    if response.status_code == 200:
        # Load the image from the response content using PIL
        image_stream = io.BytesIO(response.content)
        image = Image.open(image_stream)

        # Convert the image to a NumPy array, which is supported by EasyOCR
        image_np = np.array(image)

        # Use EasyOCR to extract text from the image
        reader = easyocr.Reader(['en'])
        result = reader.readtext(image_np)

        print("*" * 50 + image_url)
        


        # Combine the extracted text from EasyOCR
        extracted_text = "\n".join([detection[1] for detection in result])

        if len(extracted_text.split())<5 :
            # Use the BLIP model for image captioning
            client = Client("HARISH20205/blip-image-caption")
            caption_result = client.predict(image_url=image_url, api_name="/predict")
            content = "\nImage Caption:\n" + str(caption_result)
            return [{
            "file_name": os.path.basename(image_url),
            "text": content,
            }]
        # Format the content
        content = "Image Data:\n" + extracted_text 

        return [{
            "file_name": os.path.basename(image_url),
            "text": content,
        }]
    else:
        return [{
            "file_name": os.path.basename(image_url),
            "text": "Failed to retrieve image.",
        }]