Update app.py
Browse files
app.py
CHANGED
@@ -1,31 +1,30 @@
|
|
1 |
import fitz # PyMuPDF for PDF processing
|
2 |
from PIL import Image
|
3 |
-
from transformers import pipeline,
|
4 |
import streamlit as st
|
5 |
import os
|
6 |
import re
|
7 |
from docx import Document
|
8 |
from langdetect import detect
|
9 |
|
10 |
-
# Load
|
11 |
-
processor =
|
12 |
-
model =
|
13 |
|
14 |
# Load translation model
|
15 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
16 |
|
17 |
|
18 |
def extract_text_from_image(image):
|
19 |
-
"""Extract text from image using
|
20 |
-
# Convert image to RGB
|
21 |
image = image.convert("RGB")
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
# Use Qwen processor and model
|
24 |
-
pixel_values = processor(image, return_tensors="pt").pixel_values
|
25 |
-
result = model.generate(pixel_values)
|
26 |
-
decoded_text = processor.batch_decode(result, skip_special_tokens=True)[0]
|
27 |
-
|
28 |
-
# Ensure extracted text is clean
|
29 |
return decoded_text.strip()
|
30 |
|
31 |
|
|
|
1 |
import fitz # PyMuPDF for PDF processing
|
2 |
from PIL import Image
|
3 |
+
from transformers import pipeline, Blip2Processor, Blip2ForConditionalGeneration
|
4 |
import streamlit as st
|
5 |
import os
|
6 |
import re
|
7 |
from docx import Document
|
8 |
from langdetect import detect
|
9 |
|
10 |
+
# Load BLIP-2 model and processor for image-to-text
|
11 |
+
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
12 |
+
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
|
13 |
|
14 |
# Load translation model
|
15 |
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
16 |
|
17 |
|
18 |
def extract_text_from_image(image):
|
19 |
+
"""Extract text from image using BLIP-2."""
|
20 |
+
# Convert the image to RGB and preprocess
|
21 |
image = image.convert("RGB")
|
22 |
+
inputs = processor(images=image, return_tensors="pt")
|
23 |
+
|
24 |
+
# Generate text from the image
|
25 |
+
generated_ids = model.generate(**inputs)
|
26 |
+
decoded_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return decoded_text.strip()
|
29 |
|
30 |
|