File size: 1,803 Bytes
612c5f5
d7dbc2c
2c4f69d
539d19c
d7dbc2c
612c5f5
539d19c
 
 
 
 
612c5f5
 
539d19c
 
 
 
 
612c5f5
d7dbc2c
612c5f5
d7dbc2c
612c5f5
d7dbc2c
 
612c5f5
d7dbc2c
 
 
539d19c
d7dbc2c
612c5f5
 
 
 
539d19c
 
612c5f5
 
 
 
539d19c
 
612c5f5
539d19c
d7dbc2c
612c5f5
 
539d19c
d7dbc2c
612c5f5
d7dbc2c
612c5f5
d7dbc2c
612c5f5
539d19c
d7dbc2c
539d19c
d7dbc2c
 
612c5f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, AutoModel

# Load the model and processor
model_id = "OpenGVLab/InternVL2_5-78B"
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the model and processor
model = AutoModel.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    use_flash_attn=True,
    trust_remote_code=True
).eval().to(device)

processor = AutoProcessor.from_pretrained(model_id)

def generate_model_response(image_file, user_query):
    """
    Processes the uploaded image and user query to generate a response from the model.
    
    Parameters:
    - image_file: The uploaded image file.
    - user_query: The user's question about the image.

    Returns:
    - str: The generated response from the model.
    """
    try:
        # Load and prepare the image
        raw_image = Image.open(image_file).convert("RGB")
        
        # Prepare inputs for the model using the processor
        inputs = processor(images=raw_image, text=user_query, return_tensors="pt").to(device)
        
        # Generate response from the model
        outputs = model.generate(**inputs)
        
        # Decode and return the response
        response_text = processor.decode(outputs[0], skip_special_tokens=True)
        
        return response_text
    
    except Exception as e:
        print(f"Error in generating response: {e}")
        return f"An error occurred: {str(e)}"

# Gradio Interface
iface = gr.Interface(
    fn=generate_model_response,
    inputs=[
        gr.Image(type="file", label="Upload Image"),
        gr.Textbox(label="Enter your question", placeholder="What do you want to know about this image?")
    ],
    outputs="text",
)

iface.launch(share=True)