Kilos1 commited on
Commit
331693b
·
verified ·
1 Parent(s): f957c4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -33
app.py CHANGED
@@ -1,18 +1,21 @@
1
- import re
2
- import base64
3
- import io
4
  import torch
5
  import gradio as gr
6
  from PIL import Image
7
- from transformers import MllamaForConditionalGeneration, AutoProcessor
8
 
9
  # Load the model and processor
10
- model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
11
- model = MllamaForConditionalGeneration.from_pretrained(
 
 
 
12
  model_id,
13
  torch_dtype=torch.bfloat16,
14
- device_map="auto",
15
- )
 
 
 
16
  processor = AutoProcessor.from_pretrained(model_id)
17
 
18
  def generate_model_response(image_file, user_query):
@@ -24,49 +27,38 @@ def generate_model_response(image_file, user_query):
24
  - user_query: The user's question about the image.
25
 
26
  Returns:
27
- - str: The generated response from the model, formatted as HTML.
28
  """
29
  try:
30
  # Load and prepare the image
31
  raw_image = Image.open(image_file).convert("RGB")
32
 
33
- # Prepare input for the model using the processor
34
- conversation = [
35
- {
36
- "role": "user",
37
- "content": [
38
- {"type": "image", "url": "<|image|>"}, # Placeholder for image
39
- {"type": "text", "text": user_query}
40
- ]
41
- }
42
- ]
43
-
44
- # Apply chat template to prepare inputs for the model
45
- inputs = processor.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
46
 
47
- # Process the image and text inputs together
48
- inputs = processor(inputs, raw_image, return_tensors="pt").to(model.device)
49
-
50
  # Generate response from the model
51
- outputs = model.generate(**inputs)
52
-
53
- # Decode and format the response
54
- generated_text = processor.decode(outputs[0], skip_special_tokens=True)
55
 
56
- return generated_text
 
 
57
 
58
  except Exception as e:
59
  print(f"Error in generating response: {e}")
60
- return f"<p>An error occurred: {str(e)}</p>"
61
 
62
  # Gradio Interface
63
  iface = gr.Interface(
64
  fn=generate_model_response,
65
  inputs=[
66
  gr.Image(type="file", label="Upload Image"),
67
- gr.Textbox(label="Enter your question", placeholder="How many calories are in this food?")
68
  ],
69
- outputs=gr.HTML(label="Response from Model"),
70
  )
71
 
72
  iface.launch(share=True)
 
 
 
 
1
  import torch
2
  import gradio as gr
3
  from PIL import Image
4
+ from transformers import AutoProcessor, AutoModel
5
 
6
  # Load the model and processor
7
+ model_id = "OpenGVLab/InternVL2_5-78B"
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ # Initialize the model and processor
11
+ model = AutoModel.from_pretrained(
12
  model_id,
13
  torch_dtype=torch.bfloat16,
14
+ low_cpu_mem_usage=True,
15
+ use_flash_attn=True,
16
+ trust_remote_code=True
17
+ ).eval().to(device)
18
+
19
  processor = AutoProcessor.from_pretrained(model_id)
20
 
21
  def generate_model_response(image_file, user_query):
 
27
  - user_query: The user's question about the image.
28
 
29
  Returns:
30
+ - str: The generated response from the model.
31
  """
32
  try:
33
  # Load and prepare the image
34
  raw_image = Image.open(image_file).convert("RGB")
35
 
36
+ # Prepare inputs for the model using the processor
37
+ inputs = processor(
38
+ text=user_query,
39
+ images=raw_image,
40
+ return_tensors="pt"
41
+ ).to(device)
 
 
 
 
 
 
 
42
 
 
 
 
43
  # Generate response from the model
44
+ outputs = model.generate(**inputs, max_new_tokens=50)
 
 
 
45
 
46
+ # Decode and return the response
47
+ response_text = processor.decode(outputs[0], skip_special_tokens=True)
48
+ return response_text
49
 
50
  except Exception as e:
51
  print(f"Error in generating response: {e}")
52
+ return f"An error occurred: {str(e)}"
53
 
54
  # Gradio Interface
55
  iface = gr.Interface(
56
  fn=generate_model_response,
57
  inputs=[
58
  gr.Image(type="file", label="Upload Image"),
59
+ gr.Textbox(label="Enter your question", placeholder="What do you want to know about this image?")
60
  ],
61
+ outputs="text",
62
  )
63
 
64
  iface.launch(share=True)