Spaces:
Runtime error
Runtime error
| import re | |
| import base64 | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gradio as gr | |
| from PIL import Image | |
| import io | |
| from transformers import Owlv2Processor, Owlv2ForObjectDetection | |
| processor = Owlv2Processor.from_pretrained("google/owlv2-large-patch14-finetuned") | |
| model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-large-patch14-finetuned") | |
| def input_image_setup(uploaded_file): | |
| """ | |
| Encodes the uploaded image file into a base64 string. | |
| Parameters: | |
| - uploaded_file: File-like object uploaded via Gradio. | |
| Returns: | |
| - encoded_image (str): Base64 encoded string of the image data. | |
| """ | |
| if uploaded_file is not None: | |
| bytes_data = uploaded_file.read() | |
| encoded_image = base64.b64encode(bytes_data).decode("utf-8") | |
| return encoded_image | |
| else: | |
| raise FileNotFoundError("No file uploaded") | |
| def generate_model_response(encoded_image, user_query, assistant_prompt="You are a helpful assistant. Answer the following user query in 1 or 2 sentences: "): | |
| """ | |
| Sends an image and a query to the model and retrieves the description or answer. | |
| Parameters: | |
| - encoded_image (str): Base64-encoded image string. | |
| - user_query (str): The user's question about the image. | |
| - assistant_prompt (str): Optional prompt to guide the model's response. | |
| Returns: | |
| - str: The model's response for the given image and query. | |
| """ | |
| # Prepare input for the model | |
| input_text = assistant_prompt + user_query + "\n" | |
| # Tokenize input text | |
| inputs = tokenizer(input_text, return_tensors="pt") | |
| # Generate response from the model | |
| outputs = model.generate(**inputs) | |
| # Decode and return the model's response | |
| response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response_text | |
| def process_image_and_query(uploaded_file, user_query): | |
| """ | |
| Process the uploaded image and user query to generate a response from the model. | |
| Parameters: | |
| - uploaded_file: The uploaded image file. | |
| - user_query: The user's question about the image. | |
| Returns: | |
| - str: The generated response from the model. | |
| """ | |
| # Encode the uploaded image | |
| encoded_image = input_image_setup(uploaded_file) | |
| # Generate response using the encoded image and user query | |
| response = generate_model_response(encoded_image, user_query) | |
| return response | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=process_image_and_query, | |
| inputs=[ | |
| gr.inputs.Image(type="file", label="Upload Image"), | |
| gr.inputs.Textbox(label="User Query", placeholder="Enter your question about the image...") | |
| ], | |
| outputs="text", | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |