tahirsher commited on
Commit
3eaf646
·
verified ·
1 Parent(s): e9de34a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -1,31 +1,30 @@
1
  import fitz # PyMuPDF for PDF processing
2
  from PIL import Image
3
- from transformers import pipeline, AutoProcessor, AutoModelForImageTextToText
4
  import streamlit as st
5
  import os
6
  import re
7
  from docx import Document
8
  from langdetect import detect
9
 
10
- # Load Qwen model for image-to-text
11
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
12
- model = AutoModelForImageTextToText.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
13
 
14
  # Load translation model
15
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
16
 
17
 
18
  def extract_text_from_image(image):
19
- """Extract text from image using Qwen and TrOCR fallback."""
20
- # Convert image to RGB format for processing
21
  image = image.convert("RGB")
 
 
 
 
 
22
 
23
- # Use Qwen processor and model
24
- pixel_values = processor(image, return_tensors="pt").pixel_values
25
- result = model.generate(pixel_values)
26
- decoded_text = processor.batch_decode(result, skip_special_tokens=True)[0]
27
-
28
- # Ensure extracted text is clean
29
  return decoded_text.strip()
30
 
31
 
 
1
  import fitz # PyMuPDF for PDF processing
2
  from PIL import Image
3
+ from transformers import pipeline, Blip2Processor, Blip2ForConditionalGeneration
4
  import streamlit as st
5
  import os
6
  import re
7
  from docx import Document
8
  from langdetect import detect
9
 
10
+ # Load BLIP-2 model and processor for image-to-text
11
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
12
+ model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
13
 
14
  # Load translation model
15
  translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
16
 
17
 
18
  def extract_text_from_image(image):
19
+ """Extract text from image using BLIP-2."""
20
+ # Convert the image to RGB and preprocess
21
  image = image.convert("RGB")
22
+ inputs = processor(images=image, return_tensors="pt")
23
+
24
+ # Generate text from the image
25
+ generated_ids = model.generate(**inputs)
26
+ decoded_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
27
 
 
 
 
 
 
 
28
  return decoded_text.strip()
29
 
30