import streamlit as st from transformers import pipeline import spacy from io import StringIO import PyPDF2 import docx # Load Hugging Face's pre-trained NER model nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") # Sample regulations database (can be expanded with more detailed regulations) regulations = { "pollution_limit": "Air pollution should not exceed 100 µg/m³ of particulate matter.", "waste_management": "Waste should be sorted into recyclable and non-recyclable categories.", } # Function to extract text from PDF def extract_text_from_pdf(file): pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text # Function to extract text from DOCX def extract_text_from_docx(file): doc = docx.Document(file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text # Function to check compliance with regulations def check_compliance(document_text): compliance_feedback = [] # Check for pollution limit violations if "pollution" in document_text.lower(): compliance_feedback.append("Check pollution limits: Ensure PM2.5 does not exceed 100 µg/m³.") # Check for waste management practices if "waste" in document_text.lower(): compliance_feedback.append("Check waste management: Ensure waste is properly sorted.") return compliance_feedback # Streamlit App st.title("🌱 Environmental Compliance Checker") # Upload document uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"]) if uploaded_file is not None: # Extract text based on file type file_extension = uploaded_file.name.split('.')[-1].lower() if file_extension == "pdf": file_content = extract_text_from_pdf(uploaded_file) elif file_extension == "docx": file_content = extract_text_from_docx(uploaded_file) elif file_extension == "txt": file_content = uploaded_file.read().decode("utf-8") else: st.error("Unsupported file type!") file_content = "" if file_content: st.text_area("Uploaded Document", file_content, height=300) # Check compliance with regulations st.subheader("Compliance Feedback") feedback = check_compliance(file_content) if feedback: for item in feedback: st.write(f"- {item}") else: st.write("No compliance issues found.") # Optional: Provide NLP-based analysis or highlight regulations mentioned in the document st.subheader("Regulation Mentions in Document") entities = nlp(file_content) for entity in entities: st.write(f"Entity: {entity['word']} - Label: {entity['entity']}") else: st.write("Please upload a document to check compliance.")