Spaces:
Sleeping
Sleeping
Commit
·
957d892
1
Parent(s):
50cb395
Updated with OCR model and Gradio integration
Browse files- app.py +31 -14
- requirements.txt +1 -2
app.py
CHANGED
@@ -1,26 +1,39 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer
|
3 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
#
|
|
|
|
|
|
|
|
|
|
|
6 |
import sys
|
7 |
-
sys.path.append(hf_hub_download(repo_id="OpenGVLab/InternVL2-1B", filename="")) # Adjust path
|
8 |
|
9 |
-
# Load the
|
10 |
-
|
11 |
-
|
|
|
|
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
|
17 |
-
# Load the model
|
18 |
-
|
19 |
-
|
|
|
|
|
20 |
|
21 |
# Function to process and describe the image
|
22 |
def analyze_image(image):
|
23 |
-
#
|
24 |
img = image.convert("RGB")
|
25 |
# Tokenize the input
|
26 |
inputs = tokenizer("describe this image", return_tensors="pt")
|
@@ -34,4 +47,8 @@ demo = gr.Interface(
|
|
34 |
inputs=gr.Image(type="pil"), # Upload an image
|
35 |
outputs="text", # Output the extracted text
|
36 |
title="Image Description using OpenGVLab/InternVL2-1B",
|
37 |
-
description="Upload
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
+
from transformers import AutoTokenizer
|
4 |
+
|
5 |
+
# Download and import custom model files
|
6 |
+
repo_id = "OpenGVLab/InternVL2-1B"
|
7 |
|
8 |
+
# Download custom configuration and model files
|
9 |
+
config_file = hf_hub_download(repo_id=repo_id, filename="configuration_internvl_chat.py", local_dir="model_files")
|
10 |
+
model_file = hf_hub_download(repo_id=repo_id, filename="modeling_internvl.py", local_dir="model_files")
|
11 |
+
|
12 |
+
# Load the files dynamically
|
13 |
+
import importlib.util
|
14 |
import sys
|
|
|
15 |
|
16 |
+
# Load the configuration file
|
17 |
+
spec_config = importlib.util.spec_from_file_location("InternVLChatConfig", config_file)
|
18 |
+
InternVLChatConfig = importlib.util.module_from_spec(spec_config)
|
19 |
+
sys.modules["InternVLChatConfig"] = InternVLChatConfig
|
20 |
+
spec_config.loader.exec_module(InternVLChatConfig)
|
21 |
|
22 |
+
# Load the model file
|
23 |
+
spec_model = importlib.util.spec_from_file_location("InternVLForVision2Seq", model_file)
|
24 |
+
InternVLForVision2Seq = importlib.util.module_from_spec(spec_model)
|
25 |
+
sys.modules["InternVLForVision2Seq"] = InternVLForVision2Seq
|
26 |
+
spec_model.loader.exec_module(InternVLForVision2Seq)
|
27 |
|
28 |
+
# Load the tokenizer and model
|
29 |
+
model_name = "OpenGVLab/InternVL2-1B"
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
31 |
+
config = InternVLChatConfig.InternVLChatConfig.from_pretrained(model_name, trust_remote_code=True)
|
32 |
+
model = InternVLForVision2Seq.InternVLForVision2Seq.from_pretrained(model_name, config=config, trust_remote_code=True)
|
33 |
|
34 |
# Function to process and describe the image
|
35 |
def analyze_image(image):
|
36 |
+
# Convert PIL image to RGB if needed
|
37 |
img = image.convert("RGB")
|
38 |
# Tokenize the input
|
39 |
inputs = tokenizer("describe this image", return_tensors="pt")
|
|
|
47 |
inputs=gr.Image(type="pil"), # Upload an image
|
48 |
outputs="text", # Output the extracted text
|
49 |
title="Image Description using OpenGVLab/InternVL2-1B",
|
50 |
+
description="Upload an image and get a description generated by the InternVL2-1B model."
|
51 |
+
)
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
demo.launch(share=True)
|
requirements.txt
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
huggingface_hub==0.22.2
|
2 |
transformers
|
3 |
torch
|
4 |
gradio
|
@@ -6,4 +5,4 @@ datasets
|
|
6 |
pytesseract
|
7 |
Pillow
|
8 |
lmdeploy
|
9 |
-
huggingface_hub
|
|
|
|
|
1 |
transformers
|
2 |
torch
|
3 |
gradio
|
|
|
5 |
pytesseract
|
6 |
Pillow
|
7 |
lmdeploy
|
8 |
+
huggingface_hub
|