virendravaishnav commited on
Commit
50cb395
·
1 Parent(s): 4cf03a8

Updated with OCR model and Gradio integration

Browse files
Files changed (2) hide show
  1. app.py +16 -13
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,20 +1,27 @@
1
  import gradio as gr
2
- from PIL import Image
3
- from transformers import AutoTokenizer, AutoModelForVision2Seq
4
 
5
- # Load the tokenizer and model
 
 
 
 
6
  model_name = "OpenGVLab/InternVL2-1B"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
- model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
9
 
10
- # Custom load image function using PIL
11
- def load_image(image_path):
12
- return Image.open(image_path)
 
 
 
 
13
 
14
  # Function to process and describe the image
15
  def analyze_image(image):
16
  # Use PIL to load the image
17
- img = load_image(image)
18
  # Tokenize the input
19
  inputs = tokenizer("describe this image", return_tensors="pt")
20
  # Perform inference
@@ -27,8 +34,4 @@ demo = gr.Interface(
27
  inputs=gr.Image(type="pil"), # Upload an image
28
  outputs="text", # Output the extracted text
29
  title="Image Description using OpenGVLab/InternVL2-1B",
30
- description="Upload an image and get a description generated by the InternVL2-1B model."
31
- )
32
-
33
- if __name__ == "__main__":
34
- demo.launch(share=True)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ from huggingface_hub import hf_hub_download
4
 
5
+ # Import the custom model code dynamically
6
+ import sys
7
+ sys.path.append(hf_hub_download(repo_id="OpenGVLab/InternVL2-1B", filename="")) # Adjust path
8
+
9
+ # Load the custom model and tokenizer
10
  model_name = "OpenGVLab/InternVL2-1B"
11
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
12
 
13
+ # Import the custom model class from the downloaded files
14
+ from transformers_modules.OpenGVLab.InternVL2-1B.configuration_internvl_chat import InternVLChatConfig
15
+ from transformers_modules.OpenGVLab.InternVL2-1B.modeling_internvl import InternVLForVision2Seq
16
+
17
+ # Load the model
18
+ config = InternVLChatConfig.from_pretrained(model_name, trust_remote_code=True)
19
+ model = InternVLForVision2Seq.from_pretrained(model_name, config=config, trust_remote_code=True)
20
 
21
  # Function to process and describe the image
22
  def analyze_image(image):
23
  # Use PIL to load the image
24
+ img = image.convert("RGB")
25
  # Tokenize the input
26
  inputs = tokenizer("describe this image", return_tensors="pt")
27
  # Perform inference
 
34
  inputs=gr.Image(type="pil"), # Upload an image
35
  outputs="text", # Output the extracted text
36
  title="Image Description using OpenGVLab/InternVL2-1B",
37
+ description="Upload​⬤
 
 
 
 
requirements.txt CHANGED
@@ -5,4 +5,5 @@ gradio
5
  datasets
6
  pytesseract
7
  Pillow
8
- lmdeploy
 
 
5
  datasets
6
  pytesseract
7
  Pillow
8
+ lmdeploy
9
+ huggingface_hub