Spaces:

pyresearch
/

checker

Sleeping

App Files Files Community

pyresearch commited on Jan 8, 2024

Commit

6af4017

1 Parent(s): edac17d

Upload 4 files

Browse files

Files changed (3) hide show

Dockerfile +31 -0
app.py +78 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Use an official Python runtime as a parent image
+FROM python:3.9.13
+# Set the working directory to /pychecker
+WORKDIR /appchecker
+# Copy the current directory contents into the container at /pychecker
+COPY . /appchecker
+# Upgrade pip
+RUN pip install --upgrade pip
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    tesseract-ocr \
+    tesseract-ocr-eng
+# Install Python dependencies
+RUN pip install -r requirements.txt
+# Add Tesseract to PATH
+ENV PATH="/usr/bin/tesseract:${PATH}"
+# Make port 8501 available to the world outside this container
+EXPOSE 8501
+# Define environment variable
+ENV NAME World
+# Run app.py when the container launches
+CMD ["streamlit", "run", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+import cv2
+import pytesseract
+import requests
+import numpy as np
+import os
+# Set the Tesseract command path
+pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
+# Detecting characters using PyTesseract
+def detectChar(img):
+    hImg, wImg, _ = img.shape
+    boxes = pytesseract.image_to_boxes(img)
+    # Create an image for displaying boxes
+    img_with_boxes = img.copy()
+    for b in boxes.splitlines():
+        b = b.split(' ')
+        x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])
+        char_text = b[0]
+        # Draw boxes on the image
+        cv2.rectangle(img_with_boxes, (x, hImg - y), (w, hImg - h), (0, 255, 0), 2)  # Green rectangle for individual character
+        cv2.putText(img_with_boxes, char_text, (x, hImg - y + 25), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 50, 255), 2)
+    # Display the image with character boxes using Streamlit
+    st.image(img_with_boxes, caption="Image with Character Boxes", use_column_width=True)
+    # Extract text for spell-checking
+    detected_text = ''.join([b.split()[0] for b in boxes.splitlines()])
+    print(detected_text)
+    # Perform spell-check using LanguageTool API
+    suggestions = spell_check(detected_text)
+    # Display spell-check suggestions
+    if suggestions:
+     #   st.write("Spell Check only")
+        for suggestion in suggestions:
+            st.write(suggestion)
+         #   print(suggestion)
+    else:
+        st.write("No suggestions or error in spell-checking.")
+# Function to perform spell-check using LanguageTool API
+def spell_check(text):
+    api_url = "https://api.languagetool.org/v2/check"
+    data = {
+        'text': text,
+        'language': 'en-US',
+    }
+    response = requests.post(api_url, data=data)
+    result = response.json()
+    if 'matches' in result:
+        suggestions = [match['message'] for match in result['matches']]
+        return suggestions
+    else:
+        return []
+# Streamlit UI
+st.title("Pyresearch checker Detection  and Spell-checking")
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    image_data = np.frombuffer(uploaded_file.read(), np.uint8)
+    image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    st.image(image_rgb, caption="Uploaded Image.", use_column_width=True)
+    if st.button("Detect Characters and Spell-check"):
+        detectChar(image_rgb)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pytesseract
+tesseract
+requests
+numpy
+opencv-python
+tesseract