Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
|
|
2 |
from transformers import pipeline
|
3 |
import spacy
|
4 |
from io import StringIO
|
|
|
|
|
5 |
|
6 |
# Load Hugging Face's pre-trained NER model
|
7 |
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
|
@@ -12,9 +14,24 @@ regulations = {
|
|
12 |
"waste_management": "Waste should be sorted into recyclable and non-recyclable categories.",
|
13 |
}
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Function to check compliance with regulations
|
16 |
def check_compliance(document_text):
|
17 |
-
entities = nlp(document_text)
|
18 |
compliance_feedback = []
|
19 |
|
20 |
# Check for pollution limit violations
|
@@ -34,26 +51,38 @@ st.title("🌱 Environmental Compliance Checker")
|
|
34 |
uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"])
|
35 |
|
36 |
if uploaded_file is not None:
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
if feedback:
|
46 |
-
for item in feedback:
|
47 |
-
st.write(f"- {item}")
|
48 |
else:
|
49 |
-
st.
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
else:
|
58 |
st.write("Please upload a document to check compliance.")
|
59 |
|
|
|
|
2 |
from transformers import pipeline
|
3 |
import spacy
|
4 |
from io import StringIO
|
5 |
+
import PyPDF2
|
6 |
+
import docx
|
7 |
|
8 |
# Load Hugging Face's pre-trained NER model
|
9 |
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
|
|
|
14 |
"waste_management": "Waste should be sorted into recyclable and non-recyclable categories.",
|
15 |
}
|
16 |
|
17 |
+
# Function to extract text from PDF
|
18 |
+
def extract_text_from_pdf(file):
|
19 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
20 |
+
text = ""
|
21 |
+
for page in pdf_reader.pages:
|
22 |
+
text += page.extract_text()
|
23 |
+
return text
|
24 |
+
|
25 |
+
# Function to extract text from DOCX
|
26 |
+
def extract_text_from_docx(file):
|
27 |
+
doc = docx.Document(file)
|
28 |
+
text = ""
|
29 |
+
for para in doc.paragraphs:
|
30 |
+
text += para.text + "\n"
|
31 |
+
return text
|
32 |
+
|
33 |
# Function to check compliance with regulations
|
34 |
def check_compliance(document_text):
|
|
|
35 |
compliance_feedback = []
|
36 |
|
37 |
# Check for pollution limit violations
|
|
|
51 |
uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"])
|
52 |
|
53 |
if uploaded_file is not None:
|
54 |
+
# Extract text based on file type
|
55 |
+
file_extension = uploaded_file.name.split('.')[-1].lower()
|
56 |
+
if file_extension == "pdf":
|
57 |
+
file_content = extract_text_from_pdf(uploaded_file)
|
58 |
+
elif file_extension == "docx":
|
59 |
+
file_content = extract_text_from_docx(uploaded_file)
|
60 |
+
elif file_extension == "txt":
|
61 |
+
file_content = uploaded_file.read().decode("utf-8")
|
|
|
|
|
|
|
62 |
else:
|
63 |
+
st.error("Unsupported file type!")
|
64 |
+
file_content = ""
|
65 |
+
|
66 |
+
if file_content:
|
67 |
+
st.text_area("Uploaded Document", file_content, height=300)
|
68 |
+
|
69 |
+
# Check compliance with regulations
|
70 |
+
st.subheader("Compliance Feedback")
|
71 |
+
feedback = check_compliance(file_content)
|
72 |
+
|
73 |
+
if feedback:
|
74 |
+
for item in feedback:
|
75 |
+
st.write(f"- {item}")
|
76 |
+
else:
|
77 |
+
st.write("No compliance issues found.")
|
78 |
+
|
79 |
+
# Optional: Provide NLP-based analysis or highlight regulations mentioned in the document
|
80 |
+
st.subheader("Regulation Mentions in Document")
|
81 |
+
entities = nlp(file_content)
|
82 |
+
for entity in entities:
|
83 |
+
st.write(f"Entity: {entity['word']} - Label: {entity['entity']}")
|
84 |
|
85 |
else:
|
86 |
st.write("Please upload a document to check compliance.")
|
87 |
|
88 |
+
|