Spaces:

virendravaishnav
/

po-fetch-detail

Running

File size: 1,220 Bytes

99df19f
c282e28
5426c44
957d892
6dadcd1
 
c282e28
6dadcd1
b966683
a02d815
 
 
b5f436b
5426c44
 
 
 
234718c
a02d815
 
 
 
 
 
 
234718c
05513ce
234718c
6dadcd1
 
 
957d892
 
 
 
b966683

import gradio as gr
from transformers import AutoTokenizer, AutoProcessor, AutoModel
import torch

repo_id = "OpenGVLab/InternVL2-1B"

# Load the tokenizer, processor, and model directly from the Hub
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
model = AutoModel.from_pretrained(
    repo_id, trust_remote_code=True, torch_dtype=torch.float16
)

# Move model to the appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def analyze_image(image):
    try:
        img = image.convert("RGB")
        inputs = processor(images=img, text="describe this image", return_tensors="pt").to(device)
        outputs = model.generate(**inputs)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        return f"An error occurred: {str(e)}"

demo = gr.Interface(
    fn=analyze_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Image Description using InternVL2-1B",
    description="Upload an image and get a description generated by the InternVL2-1B model."
)

if __name__ == "__main__":
    demo.launch()