virendravaishnav commited on
Commit
6dadcd1
·
1 Parent(s): e8cad36

Updated with OCR model and Gradio integration

Browse files
Files changed (2) hide show
  1. app.py +22 -11
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,27 +1,38 @@
1
  import gradio as gr
2
- from transformers import AutoModelForVision2Seq, AutoTokenizer
 
 
 
3
 
4
- # Load the Hugging Face model and tokenizer, trusting remote code
5
- model_name = "OpenGVLab/InternVL2-1B"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
7
- model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Function to process and describe the image
10
  def analyze_image(image):
11
- # Convert PIL image to RGB if needed
12
  img = image.convert("RGB")
13
- # Tokenize the input
14
  inputs = tokenizer("describe this image", return_tensors="pt")
15
- # Perform inference
16
  outputs = model.generate(**inputs)
17
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
18
 
19
  # Gradio interface for image input
20
  demo = gr.Interface(
21
  fn=analyze_image,
22
- inputs=gr.Image(type="pil"), # Upload an image
23
- outputs="text", # Output the extracted text
24
- title="Image Description using OpenGVLab/InternVL2-1B",
25
  description="Upload an image and get a description generated by the InternVL2-1B model."
26
  )
27
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ from huggingface_hub import snapshot_download
4
+ import sys
5
+ import os
6
 
7
+ # Download the model snapshot
8
+ repo_id = "OpenGVLab/InternVL2-1B"
9
+ model_dir = snapshot_download(repo_id)
10
+
11
+ # Add the model directory to the Python path for dynamic imports
12
+ sys.path.append(model_dir)
13
+
14
+ # Import the custom configuration and model classes
15
+ from configuration_internvl_chat import InternVLChatConfig
16
+ from modeling_internvl_chat import InternVLForVision2Seq
17
+
18
+ # Load the tokenizer and model
19
+ tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
20
+ config = InternVLChatConfig.from_pretrained(repo_id, trust_remote_code=True)
21
+ model = InternVLForVision2Seq.from_pretrained(repo_id, config=config, trust_remote_code=True)
22
 
23
  # Function to process and describe the image
24
  def analyze_image(image):
 
25
  img = image.convert("RGB")
 
26
  inputs = tokenizer("describe this image", return_tensors="pt")
 
27
  outputs = model.generate(**inputs)
28
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
29
 
30
  # Gradio interface for image input
31
  demo = gr.Interface(
32
  fn=analyze_image,
33
+ inputs=gr.Image(type="pil"),
34
+ outputs="text",
35
+ title="Image Description using InternVL2-1B",
36
  description="Upload an image and get a description generated by the InternVL2-1B model."
37
  )
38
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  transformers==4.31.0
 
2
  gradio==3.28.3
3
  torch>=1.9
4
  Pillow==9.4.0
 
1
  transformers==4.31.0
2
+ huggingface_hub==0.16.4
3
  gradio==3.28.3
4
  torch>=1.9
5
  Pillow==9.4.0