EngrNarmeen's picture
Update app.py
266efde verified
raw
history blame
2.92 kB
import streamlit as st
from transformers import pipeline
import spacy
from io import StringIO
import PyPDF2
import docx
# Load Hugging Face's pre-trained NER model
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
# Sample regulations database (can be expanded with more detailed regulations)
regulations = {
"pollution_limit": "Air pollution should not exceed 100 µg/m³ of particulate matter.",
"waste_management": "Waste should be sorted into recyclable and non-recyclable categories.",
}
# Function to extract text from PDF
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(file):
doc = docx.Document(file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text
# Function to check compliance with regulations
def check_compliance(document_text):
compliance_feedback = []
# Check for pollution limit violations
if "pollution" in document_text.lower():
compliance_feedback.append("Check pollution limits: Ensure PM2.5 does not exceed 100 µg/m³.")
# Check for waste management practices
if "waste" in document_text.lower():
compliance_feedback.append("Check waste management: Ensure waste is properly sorted.")
return compliance_feedback
# Streamlit App
st.title("🌱 Environmental Compliance Checker")
# Upload document
uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"])
if uploaded_file is not None:
# Extract text based on file type
file_extension = uploaded_file.name.split('.')[-1].lower()
if file_extension == "pdf":
file_content = extract_text_from_pdf(uploaded_file)
elif file_extension == "docx":
file_content = extract_text_from_docx(uploaded_file)
elif file_extension == "txt":
file_content = uploaded_file.read().decode("utf-8")
else:
st.error("Unsupported file type!")
file_content = ""
if file_content:
st.text_area("Uploaded Document", file_content, height=300)
# Check compliance with regulations
st.subheader("Compliance Feedback")
feedback = check_compliance(file_content)
if feedback:
for item in feedback:
st.write(f"- {item}")
else:
st.write("No compliance issues found.")
# Optional: Provide NLP-based analysis or highlight regulations mentioned in the document
st.subheader("Regulation Mentions in Document")
entities = nlp(file_content)
for entity in entities:
st.write(f"Entity: {entity['word']} - Label: {entity['entity']}")
else:
st.write("Please upload a document to check compliance.")