Spaces:
Sleeping
Sleeping
Commit
·
6dadcd1
1
Parent(s):
e8cad36
Updated with OCR model and Gradio integration
Browse files- app.py +22 -11
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,27 +1,38 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import
|
|
|
|
|
|
|
3 |
|
4 |
-
#
|
5 |
-
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Function to process and describe the image
|
10 |
def analyze_image(image):
|
11 |
-
# Convert PIL image to RGB if needed
|
12 |
img = image.convert("RGB")
|
13 |
-
# Tokenize the input
|
14 |
inputs = tokenizer("describe this image", return_tensors="pt")
|
15 |
-
# Perform inference
|
16 |
outputs = model.generate(**inputs)
|
17 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
18 |
|
19 |
# Gradio interface for image input
|
20 |
demo = gr.Interface(
|
21 |
fn=analyze_image,
|
22 |
-
inputs=gr.Image(type="pil"),
|
23 |
-
outputs="text",
|
24 |
-
title="Image Description using
|
25 |
description="Upload an image and get a description generated by the InternVL2-1B model."
|
26 |
)
|
27 |
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
from huggingface_hub import snapshot_download
|
4 |
+
import sys
|
5 |
+
import os
|
6 |
|
7 |
+
# Download the model snapshot
|
8 |
+
repo_id = "OpenGVLab/InternVL2-1B"
|
9 |
+
model_dir = snapshot_download(repo_id)
|
10 |
+
|
11 |
+
# Add the model directory to the Python path for dynamic imports
|
12 |
+
sys.path.append(model_dir)
|
13 |
+
|
14 |
+
# Import the custom configuration and model classes
|
15 |
+
from configuration_internvl_chat import InternVLChatConfig
|
16 |
+
from modeling_internvl_chat import InternVLForVision2Seq
|
17 |
+
|
18 |
+
# Load the tokenizer and model
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
|
20 |
+
config = InternVLChatConfig.from_pretrained(repo_id, trust_remote_code=True)
|
21 |
+
model = InternVLForVision2Seq.from_pretrained(repo_id, config=config, trust_remote_code=True)
|
22 |
|
23 |
# Function to process and describe the image
|
24 |
def analyze_image(image):
|
|
|
25 |
img = image.convert("RGB")
|
|
|
26 |
inputs = tokenizer("describe this image", return_tensors="pt")
|
|
|
27 |
outputs = model.generate(**inputs)
|
28 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
29 |
|
30 |
# Gradio interface for image input
|
31 |
demo = gr.Interface(
|
32 |
fn=analyze_image,
|
33 |
+
inputs=gr.Image(type="pil"),
|
34 |
+
outputs="text",
|
35 |
+
title="Image Description using InternVL2-1B",
|
36 |
description="Upload an image and get a description generated by the InternVL2-1B model."
|
37 |
)
|
38 |
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
transformers==4.31.0
|
|
|
2 |
gradio==3.28.3
|
3 |
torch>=1.9
|
4 |
Pillow==9.4.0
|
|
|
1 |
transformers==4.31.0
|
2 |
+
huggingface_hub==0.16.4
|
3 |
gradio==3.28.3
|
4 |
torch>=1.9
|
5 |
Pillow==9.4.0
|