virendravaishnav commited on
Commit
be080e8
·
1 Parent(s): 234718c

Updated with OCR model and Gradio integration

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -1,20 +1,19 @@
1
  import gradio as gr
2
- from lmdeploy import pipeline, TurbomindEngineConfig
3
- from lmdeploy.vl import load_image
4
 
5
- # Model configuration
6
- model = 'OpenGVLab/InternVL2-1B'
 
 
7
 
8
- # Configure the pipeline for the model
9
- pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
10
-
11
- # Function to process and describe the image
12
  def analyze_image(image):
13
- # Convert PIL image to the format the model expects
14
- img = load_image(image) # `load_image` can handle both URLs and PIL images
15
- # Run inference on the uploaded image
16
- response = pipe(('describe this image', img))
17
- return response.text
18
 
19
  # Gradio interface for image input
20
  demo = gr.Interface(
 
1
  import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq
3
+ from PIL import Image
4
 
5
+ # Load processor and model
6
+ model_name = "OpenGVLab/InternVL2-1B"
7
+ processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
8
+ model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
9
 
10
+ # Function to process the image
 
 
 
11
  def analyze_image(image):
12
+ # Prepare image for the model
13
+ inputs = processor(images=image, return_tensors="pt")
14
+ # Generate output
15
+ outputs = model.generate(**inputs)
16
+ return processor.decode(outputs[0])
17
 
18
  # Gradio interface for image input
19
  demo = gr.Interface(