pyresearch commited on
Commit
6af4017
·
1 Parent(s): edac17d

Upload 4 files

Browse files
Files changed (3) hide show
  1. Dockerfile +31 -0
  2. app.py +78 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9.13
3
+
4
+ # Set the working directory to /pychecker
5
+ WORKDIR /appchecker
6
+
7
+ # Copy the current directory contents into the container at /pychecker
8
+ COPY . /appchecker
9
+
10
+ # Upgrade pip
11
+ RUN pip install --upgrade pip
12
+
13
+ # Install system dependencies
14
+ RUN apt-get update && apt-get install -y \
15
+ tesseract-ocr \
16
+ tesseract-ocr-eng
17
+
18
+ # Install Python dependencies
19
+ RUN pip install -r requirements.txt
20
+
21
+ # Add Tesseract to PATH
22
+ ENV PATH="/usr/bin/tesseract:${PATH}"
23
+
24
+ # Make port 8501 available to the world outside this container
25
+ EXPOSE 8501
26
+
27
+ # Define environment variable
28
+ ENV NAME World
29
+
30
+ # Run app.py when the container launches
31
+ CMD ["streamlit", "run", "app.py"]
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ import pytesseract
4
+ import requests
5
+ import numpy as np
6
+ import os
7
+
8
+
9
+
10
+ # Set the Tesseract command path
11
+ pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
12
+
13
+ # Detecting characters using PyTesseract
14
+ def detectChar(img):
15
+ hImg, wImg, _ = img.shape
16
+ boxes = pytesseract.image_to_boxes(img)
17
+
18
+ # Create an image for displaying boxes
19
+ img_with_boxes = img.copy()
20
+
21
+ for b in boxes.splitlines():
22
+ b = b.split(' ')
23
+ x, y, w, h = int(b[1]), int(b[2]), int(b[3]), int(b[4])
24
+ char_text = b[0]
25
+
26
+ # Draw boxes on the image
27
+ cv2.rectangle(img_with_boxes, (x, hImg - y), (w, hImg - h), (0, 255, 0), 2) # Green rectangle for individual character
28
+ cv2.putText(img_with_boxes, char_text, (x, hImg - y + 25), cv2.FONT_HERSHEY_COMPLEX, 1, (50, 50, 255), 2)
29
+
30
+ # Display the image with character boxes using Streamlit
31
+ st.image(img_with_boxes, caption="Image with Character Boxes", use_column_width=True)
32
+
33
+ # Extract text for spell-checking
34
+ detected_text = ''.join([b.split()[0] for b in boxes.splitlines()])
35
+ print(detected_text)
36
+
37
+ # Perform spell-check using LanguageTool API
38
+ suggestions = spell_check(detected_text)
39
+
40
+ # Display spell-check suggestions
41
+ if suggestions:
42
+ # st.write("Spell Check only")
43
+ for suggestion in suggestions:
44
+ st.write(suggestion)
45
+ # print(suggestion)
46
+ else:
47
+ st.write("No suggestions or error in spell-checking.")
48
+
49
+ # Function to perform spell-check using LanguageTool API
50
+ def spell_check(text):
51
+ api_url = "https://api.languagetool.org/v2/check"
52
+ data = {
53
+ 'text': text,
54
+ 'language': 'en-US',
55
+ }
56
+
57
+ response = requests.post(api_url, data=data)
58
+ result = response.json()
59
+
60
+ if 'matches' in result:
61
+ suggestions = [match['message'] for match in result['matches']]
62
+ return suggestions
63
+ else:
64
+ return []
65
+
66
+ # Streamlit UI
67
+ st.title("Pyresearch checker Detection and Spell-checking")
68
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
69
+
70
+ if uploaded_file is not None:
71
+ image_data = np.frombuffer(uploaded_file.read(), np.uint8)
72
+ image = cv2.imdecode(image_data, cv2.IMREAD_COLOR)
73
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
74
+
75
+ st.image(image_rgb, caption="Uploaded Image.", use_column_width=True)
76
+
77
+ if st.button("Detect Characters and Spell-check"):
78
+ detectChar(image_rgb)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pytesseract
3
+ tesseract
4
+ requests
5
+ numpy
6
+ opencv-python
7
+ tesseract
8
+