Spaces:

virendravaishnav
/

po-fetch-detail

Running

virendravaishnav commited on Sep 12, 2024

Commit

b5f436b

1 Parent(s): be080e8

Updated with OCR model and Gradio integration

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,19 +1,20 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModelForVision2Seq
-from PIL import Image
-# Load processor and model
-model_name = "OpenGVLab/InternVL2-1B"
-processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
-# Function to process the image
 def analyze_image(image):
-    # Prepare image for the model
-    inputs = processor(images=image, return_tensors="pt")
-    # Generate output
-    outputs = model.generate(**inputs)
-    return processor.decode(outputs[0])
 # Gradio interface for image input
 demo = gr.Interface(

 import gradio as gr
+from lmdeploy import pipeline, TurbomindEngineConfig
+from lmdeploy.vl import load_image
+# Model configuration
+model = 'OpenGVLab/InternVL2-1B'
+# Configure the pipeline for the model
+pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
+# Function to process and describe the image
 def analyze_image(image):
+    # Convert PIL image to the format the model expects
+    img = load_image(image)  # `load_image` can handle both URLs and PIL images
+    # Run inference on the uploaded image
+    response = pipe(('describe this image', img))
+    return response.text
 # Gradio interface for image input
 demo = gr.Interface(