Spaces:

virendravaishnav
/

po-fetch-detail

Sleeping

virendravaishnav commited on Sep 11, 2024

Commit

be080e8

1 Parent(s): 234718c

Updated with OCR model and Gradio integration

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,19 @@
 import gradio as gr
-from lmdeploy import pipeline, TurbomindEngineConfig
-from lmdeploy.vl import load_image
-# Model configuration
-model = 'OpenGVLab/InternVL2-1B'
-# Configure the pipeline for the model
-pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
-# Function to process and describe the image
 def analyze_image(image):
-    # Convert PIL image to the format the model expects
-    img = load_image(image)  # `load_image` can handle both URLs and PIL images
-    # Run inference on the uploaded image
-    response = pipe(('describe this image', img))
-    return response.text
 # Gradio interface for image input
 demo = gr.Interface(

 import gradio as gr
+from transformers import AutoProcessor, AutoModelForVision2Seq
+from PIL import Image
+# Load processor and model
+model_name = "OpenGVLab/InternVL2-1B"
+processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
+# Function to process the image
 def analyze_image(image):
+    # Prepare image for the model
+    inputs = processor(images=image, return_tensors="pt")
+    # Generate output
+    outputs = model.generate(**inputs)
+    return processor.decode(outputs[0])
 # Gradio interface for image input
 demo = gr.Interface(