virendravaishnav commited on
Commit
234718c
·
1 Parent(s): 4fc61d2

Updated with OCR model and Gradio integration

Browse files
Files changed (2) hide show
  1. app.py +20 -16
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,24 +1,28 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForVision2Seq
 
3
 
4
- # Load the tokenizer and model, trusting remote code
5
- model_name = "OpenGVLab/InternVL2-1B"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
7
- model = AutoModelForVision2Seq.from_pretrained(model_name, trust_remote_code=True)
8
 
9
- def analyze_image_text(image, text):
10
- # Tokenize the input
11
- inputs = tokenizer(text, return_tensors="pt")
12
- # Use the model to get outputs
13
- outputs = model.generate(**inputs)
14
- return tokenizer.decode(outputs[0])
15
 
16
- # Gradio interface
 
 
 
 
 
 
 
 
17
  demo = gr.Interface(
18
- fn=analyze_image_text,
19
- inputs=[gr.Image(type="pil"), gr.Textbox()],
20
- outputs="text",
21
- title="InternVL2-1B Image-Text Model"
 
22
  )
23
 
24
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from lmdeploy import pipeline, TurbomindEngineConfig
3
+ from lmdeploy.vl import load_image
4
 
5
+ # Model configuration
6
+ model = 'OpenGVLab/InternVL2-1B'
 
 
7
 
8
+ # Configure the pipeline for the model
9
+ pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
 
 
 
 
10
 
11
+ # Function to process and describe the image
12
+ def analyze_image(image):
13
+ # Convert PIL image to the format the model expects
14
+ img = load_image(image) # `load_image` can handle both URLs and PIL images
15
+ # Run inference on the uploaded image
16
+ response = pipe(('describe this image', img))
17
+ return response.text
18
+
19
+ # Gradio interface for image input
20
  demo = gr.Interface(
21
+ fn=analyze_image,
22
+ inputs=gr.Image(type="pil"), # Upload an image
23
+ outputs="text", # Output the extracted text
24
+ title="Image Description using OpenGVLab/InternVL2-1B",
25
+ description="Upload an image and get a description generated by the InternVL2-1B model."
26
  )
27
 
28
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -4,4 +4,5 @@ torch
4
  gradio
5
  datasets
6
  pytesseract
7
- Pillow
 
 
4
  gradio
5
  datasets
6
  pytesseract
7
+ Pillow
8
+ lmdeploy