Spaces:
Build error
Build error
| import streamlit as st | |
| from transformers import pipeline | |
| import spacy | |
| from io import StringIO | |
| import PyPDF2 | |
| import docx | |
| # Load Hugging Face's pre-trained NER model | |
| nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") | |
| # Sample regulations database (can be expanded with more detailed regulations) | |
| regulations = { | |
| "pollution_limit": "Air pollution should not exceed 100 µg/m³ of particulate matter.", | |
| "waste_management": "Waste should be sorted into recyclable and non-recyclable categories.", | |
| } | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(file): | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to extract text from DOCX | |
| def extract_text_from_docx(file): | |
| doc = docx.Document(file) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| # Function to check compliance with regulations | |
| def check_compliance(document_text): | |
| compliance_feedback = [] | |
| # Check for pollution limit violations | |
| if "pollution" in document_text.lower(): | |
| compliance_feedback.append("Check pollution limits: Ensure PM2.5 does not exceed 100 µg/m³.") | |
| # Check for waste management practices | |
| if "waste" in document_text.lower(): | |
| compliance_feedback.append("Check waste management: Ensure waste is properly sorted.") | |
| return compliance_feedback | |
| # Streamlit App | |
| st.title("🌱 Environmental Compliance Checker") | |
| # Upload document | |
| uploaded_file = st.file_uploader("Upload Environmental Report", type=["txt", "pdf", "docx"]) | |
| if uploaded_file is not None: | |
| # Extract text based on file type | |
| file_extension = uploaded_file.name.split('.')[-1].lower() | |
| if file_extension == "pdf": | |
| file_content = extract_text_from_pdf(uploaded_file) | |
| elif file_extension == "docx": | |
| file_content = extract_text_from_docx(uploaded_file) | |
| elif file_extension == "txt": | |
| file_content = uploaded_file.read().decode("utf-8") | |
| else: | |
| st.error("Unsupported file type!") | |
| file_content = "" | |
| if file_content: | |
| st.text_area("Uploaded Document", file_content, height=300) | |
| # Check compliance with regulations | |
| st.subheader("Compliance Feedback") | |
| feedback = check_compliance(file_content) | |
| if feedback: | |
| for item in feedback: | |
| st.write(f"- {item}") | |
| else: | |
| st.write("No compliance issues found.") | |
| # Optional: Provide NLP-based analysis or highlight regulations mentioned in the document | |
| st.subheader("Regulation Mentions in Document") | |
| entities = nlp(file_content) | |
| for entity in entities: | |
| st.write(f"Entity: {entity['word']} - Label: {entity['entity']}") | |
| else: | |
| st.write("Please upload a document to check compliance.") | |