virendravaishnav commited on
Commit
b5f436b
·
1 Parent(s): be080e8

Updated with OCR model and Gradio integration

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -1,19 +1,20 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, AutoModelForVision2Seq
3
- from PIL import Image
4
 
5
- # Load processor and model
6
- model_name = "OpenGVLab/InternVL2-1B"
7
- processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
8
- model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
9
 
10
- # Function to process the image
 
 
 
11
  def analyze_image(image):
12
- # Prepare image for the model
13
- inputs = processor(images=image, return_tensors="pt")
14
- # Generate output
15
- outputs = model.generate(**inputs)
16
- return processor.decode(outputs[0])
17
 
18
  # Gradio interface for image input
19
  demo = gr.Interface(
 
1
  import gradio as gr
2
+ from lmdeploy import pipeline, TurbomindEngineConfig
3
+ from lmdeploy.vl import load_image
4
 
5
+ # Model configuration
6
+ model = 'OpenGVLab/InternVL2-1B'
 
 
7
 
8
+ # Configure the pipeline for the model
9
+ pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
10
+
11
+ # Function to process and describe the image
12
  def analyze_image(image):
13
+ # Convert PIL image to the format the model expects
14
+ img = load_image(image) # `load_image` can handle both URLs and PIL images
15
+ # Run inference on the uploaded image
16
+ response = pipe(('describe this image', img))
17
+ return response.text
18
 
19
  # Gradio interface for image input
20
  demo = gr.Interface(