Spaces:
Sleeping
Sleeping
Commit
·
6af4017
1
Parent(s):
edac17d
Upload 4 files
Browse files- Dockerfile +31 -0
- app.py +78 -0
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.9.13
|
3 |
+
|
4 |
+
# Set the working directory to /pychecker
|
5 |
+
WORKDIR /appchecker
|
6 |
+
|
7 |
+
# Copy the current directory contents into the container at /pychecker
|
8 |
+
COPY . /appchecker
|
9 |
+
|
10 |
+
# Upgrade pip
|
11 |
+
RUN pip install --upgrade pip
|
12 |
+
|
13 |
+
# Install system dependencies
|
14 |
+
RUN apt-get update && apt-get install -y \
|
15 |
+
tesseract-ocr \
|
16 |
+
tesseract-ocr-eng
|
17 |
+
|
18 |
+
# Install Python dependencies
|
19 |
+
RUN pip install -r requirements.txt
|
20 |
+
|
21 |
+
# Add Tesseract to PATH
|
22 |
+
ENV PATH="/usr/bin/tesseract:${PATH}"
|
23 |
+
|
24 |
+
# Make port 8501 available to the world outside this container
|
25 |
+
EXPOSE 8501
|
26 |
+
|
27 |
+
# Define environment variable
|
28 |
+
ENV NAME World
|
29 |
+
|
30 |
+
# Run app.py when the container launches
|
31 |
+
CMD ["streamlit", "run", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import cv2
|
3 |
+
import pytesseract
|
4 |
+
import requests
|
5 |
+
import numpy as np
|
6 |
+
import os
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
# Set the Tesseract command path
|
11 |
+
pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
|
12 |
+
|
13 |
+
# Detecting characters using PyTesseract
|
14 |
+
def detectChar(img):
|
15 |
+
hImg, wImg, _ = img.shape
|
16 |
+
boxes = pytesseract.image_to_boxes(img)
|
17 |
+
|
18 |
+
# Create an image for displaying boxes
|
19 |
+
img_with_boxes = img.copy()
|
20 |
+
|
21 |
+
for b in boxes.splitlines():
|
22 |
+
b = b.split(' ')
|
23 |
+
x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])
|
24 |
+
char_text = b[0]
|
25 |
+
|
26 |
+
# Draw boxes on the image
|
27 |
+
cv2.rectangle(img_with_boxes, (x, hImg - y), (w, hImg - h), (0, 255, 0), 2) # Green rectangle for individual character
|
28 |
+
cv2.putText(img_with_boxes, char_text, (x, hImg - y + 25), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 50, 255), 2)
|
29 |
+
|
30 |
+
# Display the image with character boxes using Streamlit
|
31 |
+
st.image(img_with_boxes, caption="Image with Character Boxes", use_column_width=True)
|
32 |
+
|
33 |
+
# Extract text for spell-checking
|
34 |
+
detected_text = ''.join([b.split()[0] for b in boxes.splitlines()])
|
35 |
+
print(detected_text)
|
36 |
+
|
37 |
+
# Perform spell-check using LanguageTool API
|
38 |
+
suggestions = spell_check(detected_text)
|
39 |
+
|
40 |
+
# Display spell-check suggestions
|
41 |
+
if suggestions:
|
42 |
+
# st.write("Spell Check only")
|
43 |
+
for suggestion in suggestions:
|
44 |
+
st.write(suggestion)
|
45 |
+
# print(suggestion)
|
46 |
+
else:
|
47 |
+
st.write("No suggestions or error in spell-checking.")
|
48 |
+
|
49 |
+
# Function to perform spell-check using LanguageTool API
|
50 |
+
def spell_check(text):
|
51 |
+
api_url = "https://api.languagetool.org/v2/check"
|
52 |
+
data = {
|
53 |
+
'text': text,
|
54 |
+
'language': 'en-US',
|
55 |
+
}
|
56 |
+
|
57 |
+
response = requests.post(api_url, data=data)
|
58 |
+
result = response.json()
|
59 |
+
|
60 |
+
if 'matches' in result:
|
61 |
+
suggestions = [match['message'] for match in result['matches']]
|
62 |
+
return suggestions
|
63 |
+
else:
|
64 |
+
return []
|
65 |
+
|
66 |
+
# Streamlit UI
|
67 |
+
st.title("Pyresearch checker Detection and Spell-checking")
|
68 |
+
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
69 |
+
|
70 |
+
if uploaded_file is not None:
|
71 |
+
image_data = np.frombuffer(uploaded_file.read(), np.uint8)
|
72 |
+
image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
|
73 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
74 |
+
|
75 |
+
st.image(image_rgb, caption="Uploaded Image.", use_column_width=True)
|
76 |
+
|
77 |
+
if st.button("Detect Characters and Spell-check"):
|
78 |
+
detectChar(image_rgb)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pytesseract
|
3 |
+
tesseract
|
4 |
+
requests
|
5 |
+
numpy
|
6 |
+
opencv-python
|
7 |
+
tesseract
|
8 |
+
|