aminahmed78 commited on
Commit
69a22be
·
verified ·
1 Parent(s): ec4c658

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -50
app.py CHANGED
@@ -1,58 +1,32 @@
1
  import streamlit as st
2
- from paddleocr import PaddleOCR, draw_ocr
3
  from PIL import Image
 
4
  import numpy as np
5
- from langdetect import detect
6
- from transformers import pipeline
7
- import torch
8
 
9
- # Initialize PaddleOCR for multilingual text recognition
10
- ocr = PaddleOCR(use_angle_cls=True, lang='ar') # Using 'ar' to support Arabic scripts like Urdu
11
 
12
- # Load summarization model
13
- summarizer = pipeline("summarization")
14
 
15
- def recognize_text(image_path):
16
- image = Image.open(image_path)
17
- img_array = np.array(image)
18
- # OCR processing
19
- ocr_results = ocr.ocr(img_array, cls=True)
20
- # Extracting text from OCR results
21
- detected_text = " ".join([line[1][0] for line in ocr_results[0]])
22
- # Language detection and summarization
23
- language = detect(detected_text)
24
- summary = summarizer(detected_text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
25
- return detected_text, language, summary
26
-
27
- def display_ocr_results(image, ocr_results):
28
- boxes = [line[0] for line in ocr_results[0]]
29
- texts = [line[1][0] for line in ocr_results[0]]
30
- scores = [line[1][1] for line in ocr_results[0]]
31
- font_path = "/path/to/font.ttf" # Replace with a valid path to a font supporting Urdu/Arabic
32
- return draw_ocr(np.array(image), boxes, texts, scores, font_path=font_path)
33
-
34
- # Streamlit Interface
35
- st.title("Multilingual OCR and Text Summarization App")
36
- st.write("Upload an image or capture one to get OCR results and text summary")
37
 
38
- # Image Upload or Capture
39
- image_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
40
- if image_file is not None:
41
- with open("uploaded_image.png", "wb") as f:
42
- f.write(image_file.getbuffer())
43
- st.success("Image uploaded successfully!")
44
- image = Image.open("uploaded_image.png")
45
- st.image(image, caption="Uploaded Image", use_container_width=True)
46
-
47
- # Perform OCR and display results
48
- detected_text, language, summary = recognize_text("uploaded_image.png")
49
- st.write("### Detected Text")
50
- st.write(detected_text)
51
- st.write("### Detected Language")
52
- st.write(language)
53
- st.write("### Text Summary")
54
- st.write(summary)
55
 
56
- # Display OCR visualization
57
- visualized_image = display_ocr_results(image, ocr.ocr(np.array(image), cls=True))
58
- st.image(visualized_image, caption="OCR Results Visualization", use_container_width=True)
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  from PIL import Image
3
+ import easyocr
4
  import numpy as np
 
 
 
5
 
6
+ # Initialize EasyOCR Reader for specific languages
7
+ reader = easyocr.Reader(['en', 'ur']) # Include 'ur' for Urdu
8
 
9
+ # Title
10
+ st.title("OCR App with EasyOCR")
11
 
12
+ # File uploader
13
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ if uploaded_file is not None:
16
+ # Open and display the image
17
+ image = Image.open(uploaded_file)
18
+ st.image(image, caption="Uploaded Image", use_column_width=True)
19
+
20
+ # Convert to numpy array for OCR
21
+ img_array = np.array(image)
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Perform OCR
24
+ with st.spinner("Processing..."):
25
+ results = reader.readtext(img_array)
26
+
27
+ # Display results
28
+ detected_text = ""
29
+ for (bbox, text, prob) in results:
30
+ detected_text += f"{text} "
31
+
32
+ st.write("Detected Text:", detected_text)