Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,58 +1,32 @@
|
|
1 |
import streamlit as st
|
2 |
-
from paddleocr import PaddleOCR, draw_ocr
|
3 |
from PIL import Image
|
|
|
4 |
import numpy as np
|
5 |
-
from langdetect import detect
|
6 |
-
from transformers import pipeline
|
7 |
-
import torch
|
8 |
|
9 |
-
# Initialize
|
10 |
-
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
img_array = np.array(image)
|
18 |
-
# OCR processing
|
19 |
-
ocr_results = ocr.ocr(img_array, cls=True)
|
20 |
-
# Extracting text from OCR results
|
21 |
-
detected_text = " ".join([line[1][0] for line in ocr_results[0]])
|
22 |
-
# Language detection and summarization
|
23 |
-
language = detect(detected_text)
|
24 |
-
summary = summarizer(detected_text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
|
25 |
-
return detected_text, language, summary
|
26 |
-
|
27 |
-
def display_ocr_results(image, ocr_results):
|
28 |
-
boxes = [line[0] for line in ocr_results[0]]
|
29 |
-
texts = [line[1][0] for line in ocr_results[0]]
|
30 |
-
scores = [line[1][1] for line in ocr_results[0]]
|
31 |
-
font_path = "/path/to/font.ttf" # Replace with a valid path to a font supporting Urdu/Arabic
|
32 |
-
return draw_ocr(np.array(image), boxes, texts, scores, font_path=font_path)
|
33 |
-
|
34 |
-
# Streamlit Interface
|
35 |
-
st.title("Multilingual OCR and Text Summarization App")
|
36 |
-
st.write("Upload an image or capture one to get OCR results and text summary")
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
st.image(image, caption="Uploaded Image", use_container_width=True)
|
46 |
-
|
47 |
-
# Perform OCR and display results
|
48 |
-
detected_text, language, summary = recognize_text("uploaded_image.png")
|
49 |
-
st.write("### Detected Text")
|
50 |
-
st.write(detected_text)
|
51 |
-
st.write("### Detected Language")
|
52 |
-
st.write(language)
|
53 |
-
st.write("### Text Summary")
|
54 |
-
st.write(summary)
|
55 |
|
56 |
-
#
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from PIL import Image
|
3 |
+
import easyocr
|
4 |
import numpy as np
|
|
|
|
|
|
|
5 |
|
6 |
+
# Initialize EasyOCR Reader for specific languages
|
7 |
+
reader = easyocr.Reader(['en', 'ur']) # Include 'ur' for Urdu
|
8 |
|
9 |
+
# Title
|
10 |
+
st.title("OCR App with EasyOCR")
|
11 |
|
12 |
+
# File uploader
|
13 |
+
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
if uploaded_file is not None:
|
16 |
+
# Open and display the image
|
17 |
+
image = Image.open(uploaded_file)
|
18 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
19 |
+
|
20 |
+
# Convert to numpy array for OCR
|
21 |
+
img_array = np.array(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
# Perform OCR
|
24 |
+
with st.spinner("Processing..."):
|
25 |
+
results = reader.readtext(img_array)
|
26 |
+
|
27 |
+
# Display results
|
28 |
+
detected_text = ""
|
29 |
+
for (bbox, text, prob) in results:
|
30 |
+
detected_text += f"{text} "
|
31 |
+
|
32 |
+
st.write("Detected Text:", detected_text)
|