Spaces:

aminahmed78
/

text-from-image

Sleeping

App Files Files Community

aminahmed78 commited on Nov 6, 2024

Commit

57b8db4

verified ·

1 Parent(s): fc691a3

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -34

app.py CHANGED Viewed

@@ -1,46 +1,62 @@
 import streamlit as st
 from paddleocr import PaddleOCR, draw_ocr
-from PIL import Image, ImageFont
 import numpy as np
 from langdetect import detect
-import os
-# Set up OCR for Urdu
-ocr = PaddleOCR(lang='ar')  # Use 'ar' for Arabic-based scripts like Urdu
-# Upload or capture image
-st.title("OCR Application for Urdu Text")
-uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
-if uploaded_file is not None:
-    image = Image.open(uploaded_file)
-    st.image(image, caption='Uploaded Image', use_container_width=True)
-    # OCR and Display
-    st.write("Processing...")
-    result = ocr.ocr(np.array(image), cls=True)
-    # Process OCR results
-    boxes = [res[0] for res in result[0]]
-    texts = [res[1][0] for res in result[0]]
-    scores = [res[1][1] for res in result[0]]
-    # Display OCR text results
-    detected_text = " ".join(texts)
-    st.write("Detected Text")
-    st.write(detected_text)
-    # Language detection
-    detected_lang = detect(detected_text)
-    st.write("Detected Language:", detected_lang)
-    # Font setup
-    font_path = "/content/drive/MyDrive/Colab Notebooks/NOORIN59.TTF"  # Update with an Urdu-compatible font if possible
-    if not os.path.exists(font_path):
-        st.write("Font file not found. Using default.")
-    # Draw OCR results on image
-    st.write("OCR Visualization")
-    visualized_image = draw_ocr(np.array(image), boxes, texts, scores, font_path=font_path)
-    st.image(visualized_image, caption='OCR Result Visualization', use_container_width=True)

 import streamlit as st
 from paddleocr import PaddleOCR, draw_ocr
+from PIL import Image
 import numpy as np
 from langdetect import detect
+from transformers import pipeline
+# Initialize PaddleOCR for multilingual text recognition
+ocr = PaddleOCR(use_angle_cls=True, lang='en')  # For language options, use 'ch' for Chinese, etc.
+# Load summarization model
+summarizer = pipeline("summarization")
+def recognize_text(image_path):
+    image = Image.open(image_path)
+    img_array = np.array(image)
+    # OCR processing
+    ocr_results = ocr.ocr(img_array, cls=True)
+    # Extracting text from OCR results
+    detected_text = " ".join([line[1][0] for line in ocr_results[0]])
+    # Language detection and summarization
+    language = detect(detected_text)
+    summary = summarizer(detected_text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
+    return detected_text, language, summary
+def display_ocr_results(image, ocr_results):
+    boxes = [line[0] for line in ocr_results[0]]
+    texts = [line[1][0] for line in ocr_results[0]]
+    scores = [line[1][1] for line in ocr_results[0]]
+    return draw_ocr(np.array(image), boxes, texts, scores, font_path='path_to_font.ttf')
+# Streamlit Interface
+st.title("Multilingual OCR and Text Summarization App")
+st.write("Upload an image or capture one to get OCR results and text summary")
+# Image Upload or Capture
+image_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
+if image_file is not None:
+    with open("uploaded_image.png", "wb") as f:
+        f.write(image_file.getbuffer())
+    st.success("Image uploaded successfully!")
+    image = Image.open("uploaded_image.png")
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Perform OCR and display results
+    detected_text, language, summary = recognize_text("uploaded_image.png")
+    st.write("### Detected Text")
+    st.write(detected_text)
+    st.write("### Detected Language")
+    st.write(language)
+    st.write("### Text Summary")
+    st.write(summary)
+    # Display OCR visualization
+    visualized_image = display_ocr_results(image, ocr.ocr(np.array(image), cls=True))
+    st.image(visualized_image, caption="OCR Results Visualization", use_column_width=True)