Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| import subprocess | |
| import numpy as np | |
| from PIL import Image | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| # Load model and processor, enabling trust_remote_code if needed | |
| model_name = "PJMixers-Images/Florence-2-base-Castollux-v0.5" | |
| model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).eval() | |
| processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) | |
| # Set device (GPU if available) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| TITLE = f"# [{model_name}](https://huggingface.co/{model_name})" | |
| def process_image(image): | |
| """ | |
| Process a single image to generate a caption. | |
| Supports image input as file path, numpy array, or PIL Image. | |
| """ | |
| try: | |
| # Convert input to PIL image if necessary | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| elif isinstance(image, str): | |
| image = Image.open(image) | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| # Prepare inputs for the model | |
| inputs = processor(text="<CAPTION>", images=image, return_tensors="pt") | |
| # Move tensors to the appropriate device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Disable gradients during inference | |
| with torch.no_grad(): | |
| generated_ids = model.generate( | |
| input_ids=inputs["input_ids"], | |
| pixel_values=inputs["pixel_values"], | |
| max_new_tokens=1024, | |
| num_beams=5, | |
| do_sample=True, | |
| ) | |
| # Decode and post-process the generated text | |
| return processor.batch_decode( | |
| generated_ids, | |
| skip_special_tokens=False | |
| )[0].replace('</s>', '').replace('<s>', '').replace('<pad>', '').strip() | |
| except Exception as e: | |
| return f"Error processing image: {e}" | |
| # Custom CSS to style the output box | |
| css = """ | |
| #output { height: 500px; overflow: auto; border: 1px solid #ccc; } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| gr.Markdown(TITLE) | |
| with gr.Tab(label="Single Image Processing"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_img = gr.Image(label="Input Picture") | |
| submit_btn = gr.Button(value="Submit") | |
| with gr.Column(): | |
| output_text = gr.Textbox(label="Output Text") | |
| gr.Examples( | |
| [ | |
| ["eval_img_1.jpg"], | |
| ["eval_img_2.jpg"], | |
| ["eval_img_3.jpg"], | |
| ["eval_img_4.jpg"], | |
| ["eval_img_5.jpg"], | |
| ["eval_img_6.jpg"], | |
| ["eval_img_7.png"], | |
| ["eval_img_8.jpg"], | |
| ], | |
| inputs=[input_img], | |
| outputs=[output_text], | |
| fn=process_image, | |
| label="Try captioning on below examples", | |
| ) | |
| submit_btn.click(process_image, [input_img], [output_text]) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |