virendravaishnav commited on
Commit
ee363c7
·
1 Parent(s): 53edb88

Updated with OCR model and Gradio integration

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -1,20 +1,24 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # Load a Hugging Face OCR model for printed text
5
- ocr_model = pipeline('image-to-text', model='microsoft/trocr-base-printed')
 
 
6
 
7
- def analyze_image(image):
8
- result = ocr_model(image)
9
- return result[0]['generated_text'] if result else "No text could be extracted."
 
 
 
10
 
11
- # Gradio interface for image input
12
  demo = gr.Interface(
13
- fn=analyze_image,
14
- inputs=gr.Image(type="pil"), # Upload an image
15
- outputs="text", # Output the extracted text
16
- title="Invoice Text Extraction",
17
- description="Upload an image of an invoice to extract text."
18
  )
19
 
20
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForVision2Seq
3
 
4
+ # Load the tokenizer and model
5
+ model_name = "OpenGVLab/InternVL2-1B"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForVision2Seq.from_pretrained(model_name)
8
 
9
+ def analyze_image_text(image, text):
10
+ # Tokenize the input
11
+ inputs = tokenizer(text, return_tensors="pt")
12
+ # Use the model to get outputs
13
+ outputs = model.generate(**inputs)
14
+ return tokenizer.decode(outputs[0])
15
 
16
+ # Gradio interface
17
  demo = gr.Interface(
18
+ fn=analyze_image_text,
19
+ inputs=[gr.Image(type="pil"), gr.Textbox()],
20
+ outputs="text",
21
+ title="InternVL2-1B Image-Text Model"
 
22
  )
23
 
24
  if __name__ == "__main__":