Spaces:

virendravaishnav
/

po-fetch-detail

Sleeping

App Files Files Community

virendravaishnav commited on Sep 12, 2024

Commit

957d892

1 Parent(s): 50cb395

Updated with OCR model and Gradio integration

Browse files

Files changed (2) hide show

app.py +31 -14
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,26 +1,39 @@
 import gradio as gr
-from transformers import AutoTokenizer
 from huggingface_hub import hf_hub_download
-# Import the custom model code dynamically
 import sys
-sys.path.append(hf_hub_download(repo_id="OpenGVLab/InternVL2-1B", filename=""))  # Adjust path
-# Load the custom model and tokenizer
-model_name = "OpenGVLab/InternVL2-1B"
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-# Import the custom model class from the downloaded files
-from transformers_modules.OpenGVLab.InternVL2-1B.configuration_internvl_chat import InternVLChatConfig
-from transformers_modules.OpenGVLab.InternVL2-1B.modeling_internvl import InternVLForVision2Seq
-# Load the model
-config = InternVLChatConfig.from_pretrained(model_name, trust_remote_code=True)
-model = InternVLForVision2Seq.from_pretrained(model_name, config=config, trust_remote_code=True)
 # Function to process and describe the image
 def analyze_image(image):
-    # Use PIL to load the image
     img = image.convert("RGB")
     # Tokenize the input
     inputs = tokenizer("describe this image", return_tensors="pt")
@@ -34,4 +47,8 @@ demo = gr.Interface(
     inputs=gr.Image(type="pil"),  # Upload an image
     outputs="text",  # Output the extracted text
     title="Image Description using OpenGVLab/InternVL2-1B",
-    description="Upload⬤

 import gradio as gr
 from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer
+# Download and import custom model files
+repo_id = "OpenGVLab/InternVL2-1B"
+# Download custom configuration and model files
+config_file = hf_hub_download(repo_id=repo_id, filename="configuration_internvl_chat.py", local_dir="model_files")
+model_file = hf_hub_download(repo_id=repo_id, filename="modeling_internvl.py", local_dir="model_files")
+# Load the files dynamically
+import importlib.util
 import sys
+# Load the configuration file
+spec_config = importlib.util.spec_from_file_location("InternVLChatConfig", config_file)
+InternVLChatConfig = importlib.util.module_from_spec(spec_config)
+sys.modules["InternVLChatConfig"] = InternVLChatConfig
+spec_config.loader.exec_module(InternVLChatConfig)
+# Load the model file
+spec_model = importlib.util.spec_from_file_location("InternVLForVision2Seq", model_file)
+InternVLForVision2Seq = importlib.util.module_from_spec(spec_model)
+sys.modules["InternVLForVision2Seq"] = InternVLForVision2Seq
+spec_model.loader.exec_module(InternVLForVision2Seq)
+# Load the tokenizer and model
+model_name = "OpenGVLab/InternVL2-1B"
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+config = InternVLChatConfig.InternVLChatConfig.from_pretrained(model_name, trust_remote_code=True)
+model = InternVLForVision2Seq.InternVLForVision2Seq.from_pretrained(model_name, config=config, trust_remote_code=True)
 # Function to process and describe the image
 def analyze_image(image):
+    # Convert PIL image to RGB if needed
     img = image.convert("RGB")
     # Tokenize the input
     inputs = tokenizer("describe this image", return_tensors="pt")
     inputs=gr.Image(type="pil"),  # Upload an image
     outputs="text",  # Output the extracted text
     title="Image Description using OpenGVLab/InternVL2-1B",
+    description="Upload an image and get a description generated by the InternVL2-1B model."
+)
+if __name__ == "__main__":
+    demo.launch(share=True)

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-huggingface_hub==0.22.2
 transformers
 torch
 gradio
@@ -6,4 +5,4 @@ datasets
 pytesseract
 Pillow
 lmdeploy
-huggingface_hub

 transformers
 torch
 gradio
 pytesseract
 Pillow
 lmdeploy
+huggingface_hub