import re import base64 from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr from PIL import Image from transformers import Owlv2Processor, Owlv2ForObjectDetection model_id = "IDEA-Research/grounding-dino-base" device = "cuda" if torch.cuda.is_available() else "cpu" processor = AutoProcessor.from_pretrained(model_id) model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device) def input_image_setup(uploaded_file): """ Encodes the uploaded image file into a base64 string. Parameters: - uploaded_file: File-like object uploaded via Gradio. Returns: - encoded_image (str): Base64 encoded string of the image data. """ if uploaded_file is not None: # Convert the image to bytes and encode in Base64 bytes_data = uploaded_file.tobytes() encoded_image = base64.b64encode(bytes_data).decode("utf-8") return encoded_image else: raise FileNotFoundError("No file uploaded") def format_response(response_text): """ Formats the model response to display each item on a new line as a list. Converts numbered items into HTML `