Spaces:

EngrNarmeen
/

Environmental_Compliance_Checker

Build error

App Files Files Community

Environmental_Compliance_Checker / app.py

EngrNarmeen

Update app.py

3e698e3 verified about 1 year ago

raw

history blame

2.92 kB

	import streamlit as st
	from transformers import pipeline
	import spacy
	from io import StringIO
	import PyPDF2
	import docx

	# Load Hugging Face's pre-trained NER model
	nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

	# Sample regulations database (can be expanded with more detailed regulations)
	regulations = {
	"pollution_limit": "Air pollution should not exceed 100 µg/m³ of particulate matter.",
	"waste_management": "Waste should be sorted into recyclable and non-recyclable categories.",
	}

	# Function to extract text from PDF
	def extract_text_from_pdf(file):
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	# Function to extract text from DOCX
	def extract_text_from_docx(file):
	doc = docx.Document(file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text

	# Function to check compliance with regulations
	def check_compliance(document_text):
	compliance_feedback = []

	# Check for pollution limit violations
	if "pollution" in document_text.lower():
	compliance_feedback.append("Check pollution limits: Ensure PM2.5 does not exceed 100 µg/m³.")

	# Check for waste management practices
	if "waste" in document_text.lower():
	compliance_feedback.append("Check waste management: Ensure waste is properly sorted.")

	return compliance_feedback

	# Streamlit App
	st.title("🌱 Environmental Compliance Checker")

	# Upload document
	uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"])

	if uploaded_file is not None:
	# Extract text based on file type
	file_extension = uploaded_file.name.split('.')[-1].lower()
	if file_extension == "pdf":
	file_content = extract_text_from_pdf(uploaded_file)
	elif file_extension == "docx":
	file_content = extract_text_from_docx(uploaded_file)
	elif file_extension == "txt":
	file_content = uploaded_file.read().decode("utf-8")
	else:
	st.error("Unsupported file type!")
	file_content = ""

	if file_content:
	st.text_area("Uploaded Document", file_content, height=300)

	# Check compliance with regulations
	st.subheader("Compliance Feedback")
	feedback = check_compliance(file_content)

	if feedback:
	for item in feedback:
	st.write(f"- {item}")
	else:
	st.write("No compliance issues found.")

	# Optional: Provide NLP-based analysis or highlight regulations mentioned in the document
	st.subheader("Regulation Mentions in Document")
	entities = nlp(file_content)
	for entity in entities:
	st.write(f"Entity: {entity['word']} - Label: {entity['entity']}")

	else:
	st.write("Please upload a document to check compliance.")