|
import streamlit as st |
|
from transformers import pipeline |
|
import spacy |
|
from io import StringIO |
|
import PyPDF2 |
|
import docx |
|
|
|
|
|
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") |
|
|
|
|
|
regulations = { |
|
"pollution_limit": "Air pollution should not exceed 100 µg/m³ of particulate matter.", |
|
"waste_management": "Waste should be sorted into recyclable and non-recyclable categories.", |
|
} |
|
|
|
|
|
def extract_text_from_pdf(file): |
|
pdf_reader = PyPDF2.PdfReader(file) |
|
text = "" |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
def extract_text_from_docx(file): |
|
doc = docx.Document(file) |
|
text = "" |
|
for para in doc.paragraphs: |
|
text += para.text + "\n" |
|
return text |
|
|
|
|
|
def check_compliance(document_text): |
|
compliance_feedback = [] |
|
|
|
|
|
if "pollution" in document_text.lower(): |
|
compliance_feedback.append("Check pollution limits: Ensure PM2.5 does not exceed 100 µg/m³.") |
|
|
|
|
|
if "waste" in document_text.lower(): |
|
compliance_feedback.append("Check waste management: Ensure waste is properly sorted.") |
|
|
|
return compliance_feedback |
|
|
|
|
|
st.title("🌱 Environmental Compliance Checker") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
file_extension = uploaded_file.name.split('.')[-1].lower() |
|
if file_extension == "pdf": |
|
file_content = extract_text_from_pdf(uploaded_file) |
|
elif file_extension == "docx": |
|
file_content = extract_text_from_docx(uploaded_file) |
|
elif file_extension == "txt": |
|
file_content = uploaded_file.read().decode("utf-8") |
|
else: |
|
st.error("Unsupported file type!") |
|
file_content = "" |
|
|
|
if file_content: |
|
st.text_area("Uploaded Document", file_content, height=300) |
|
|
|
|
|
st.subheader("Compliance Feedback") |
|
feedback = check_compliance(file_content) |
|
|
|
if feedback: |
|
for item in feedback: |
|
st.write(f"- {item}") |
|
else: |
|
st.write("No compliance issues found.") |
|
|
|
|
|
st.subheader("Regulation Mentions in Document") |
|
entities = nlp(file_content) |
|
for entity in entities: |
|
st.write(f"Entity: {entity['word']} - Label: {entity['entity']}") |
|
|
|
else: |
|
st.write("Please upload a document to check compliance.") |
|
|