virendravaishnav commited on
Commit
957d892
·
1 Parent(s): 50cb395

Updated with OCR model and Gradio integration

Browse files
Files changed (2) hide show
  1. app.py +31 -14
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,26 +1,39 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer
3
  from huggingface_hub import hf_hub_download
 
 
 
 
4
 
5
- # Import the custom model code dynamically
 
 
 
 
 
6
  import sys
7
- sys.path.append(hf_hub_download(repo_id="OpenGVLab/InternVL2-1B", filename="")) # Adjust path
8
 
9
- # Load the custom model and tokenizer
10
- model_name = "OpenGVLab/InternVL2-1B"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
12
 
13
- # Import the custom model class from the downloaded files
14
- from transformers_modules.OpenGVLab.InternVL2-1B.configuration_internvl_chat import InternVLChatConfig
15
- from transformers_modules.OpenGVLab.InternVL2-1B.modeling_internvl import InternVLForVision2Seq
 
 
16
 
17
- # Load the model
18
- config = InternVLChatConfig.from_pretrained(model_name, trust_remote_code=True)
19
- model = InternVLForVision2Seq.from_pretrained(model_name, config=config, trust_remote_code=True)
 
 
20
 
21
  # Function to process and describe the image
22
  def analyze_image(image):
23
- # Use PIL to load the image
24
  img = image.convert("RGB")
25
  # Tokenize the input
26
  inputs = tokenizer("describe this image", return_tensors="pt")
@@ -34,4 +47,8 @@ demo = gr.Interface(
34
  inputs=gr.Image(type="pil"), # Upload an image
35
  outputs="text", # Output the extracted text
36
  title="Image Description using OpenGVLab/InternVL2-1B",
37
- description="Upload​⬤
 
 
 
 
 
1
  import gradio as gr
 
2
  from huggingface_hub import hf_hub_download
3
+ from transformers import AutoTokenizer
4
+
5
+ # Download and import custom model files
6
+ repo_id = "OpenGVLab/InternVL2-1B"
7
 
8
+ # Download custom configuration and model files
9
+ config_file = hf_hub_download(repo_id=repo_id, filename="configuration_internvl_chat.py", local_dir="model_files")
10
+ model_file = hf_hub_download(repo_id=repo_id, filename="modeling_internvl.py", local_dir="model_files")
11
+
12
+ # Load the files dynamically
13
+ import importlib.util
14
  import sys
 
15
 
16
+ # Load the configuration file
17
+ spec_config = importlib.util.spec_from_file_location("InternVLChatConfig", config_file)
18
+ InternVLChatConfig = importlib.util.module_from_spec(spec_config)
19
+ sys.modules["InternVLChatConfig"] = InternVLChatConfig
20
+ spec_config.loader.exec_module(InternVLChatConfig)
21
 
22
+ # Load the model file
23
+ spec_model = importlib.util.spec_from_file_location("InternVLForVision2Seq", model_file)
24
+ InternVLForVision2Seq = importlib.util.module_from_spec(spec_model)
25
+ sys.modules["InternVLForVision2Seq"] = InternVLForVision2Seq
26
+ spec_model.loader.exec_module(InternVLForVision2Seq)
27
 
28
+ # Load the tokenizer and model
29
+ model_name = "OpenGVLab/InternVL2-1B"
30
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
31
+ config = InternVLChatConfig.InternVLChatConfig.from_pretrained(model_name, trust_remote_code=True)
32
+ model = InternVLForVision2Seq.InternVLForVision2Seq.from_pretrained(model_name, config=config, trust_remote_code=True)
33
 
34
  # Function to process and describe the image
35
  def analyze_image(image):
36
+ # Convert PIL image to RGB if needed
37
  img = image.convert("RGB")
38
  # Tokenize the input
39
  inputs = tokenizer("describe this image", return_tensors="pt")
 
47
  inputs=gr.Image(type="pil"), # Upload an image
48
  outputs="text", # Output the extracted text
49
  title="Image Description using OpenGVLab/InternVL2-1B",
50
+ description="Upload an image and get a description generated by the InternVL2-1B model."
51
+ )
52
+
53
+ if __name__ == "__main__":
54
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- huggingface_hub==0.22.2
2
  transformers
3
  torch
4
  gradio
@@ -6,4 +5,4 @@ datasets
6
  pytesseract
7
  Pillow
8
  lmdeploy
9
- huggingface_hub
 
 
1
  transformers
2
  torch
3
  gradio
 
5
  pytesseract
6
  Pillow
7
  lmdeploy
8
+ huggingface_hub