|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoProcessor, AutoModel |
|
import torch |
|
|
|
repo_id = "OpenGVLab/InternVL2-1B" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True) |
|
processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True) |
|
model = AutoModel.from_pretrained( |
|
repo_id, trust_remote_code=True, torch_dtype=torch.float16 |
|
) |
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
|
|
def analyze_image(image): |
|
try: |
|
img = image.convert("RGB") |
|
inputs = processor(images=img, text="describe this image", return_tensors="pt").to(device) |
|
outputs = model.generate(**inputs) |
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
except Exception as e: |
|
return f"An error occurred: {str(e)}" |
|
|
|
demo = gr.Interface( |
|
fn=analyze_image, |
|
inputs=gr.Image(type="pil"), |
|
outputs="text", |
|
title="Image Description using InternVL2-1B", |
|
description="Upload an image and get a description generated by the InternVL2-1B model." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |