legal_document_summarization / Update_tracking.py
sohampawar1030's picture
Upload 13 files
6a020f1 verified
import requests
from bs4 import BeautifulSoup
import time
import streamlit as st
import threading
from datetime import datetime, timedelta
import smtplib
from email.message import EmailMessage
import os
from dotenv import load_dotenv
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
# Load environment variables from .env file
load_dotenv()
# Google Sheets Configuration
SHEET_ID = '1bZjlA-UJrBhWS2jHlEQ-7nbmDvxpEoKylgxHW51Hhzc' # Google Sheets ID
RANGE = 'Sheet1!A:D' # The range where you want to append the data
# Predefined list of URLs to track
TRACKING_URLS = [
"https://gdpr-info.eu/recitals/no-1/"]
# Event to signal thread termination
stop_event = threading.Event()
# Authenticate Google Sheets API
def authenticate_google_sheets():
creds = Credentials.from_service_account_file(
'Credentials.json',
scopes=['https://www.googleapis.com/auth/spreadsheets']
)
service = build('sheets', 'v4', credentials=creds)
return service
# Append data to Google Sheets
def append_to_google_sheets(service, url, title, content, timestamp):
values = [
[url, title, content[:200], timestamp] # Prepare row to append
]
body = {'values': values}
try:
service.spreadsheets().values().append(
spreadsheetId=SHEET_ID,
range=RANGE,
valueInputOption="RAW",
body=body
).execute()
st.write(f"Data appended to Google Sheets at {timestamp}.")
except Exception as e:
st.error(f"Error appending to Google Sheets: {e}")
# Send email notification
def send_email_notification(to_email, url, title, content, timestamp):
sender_email = os.getenv("EMAIL_ADDRESS")
sender_password = os.getenv("EMAIL_PASSWORD")
smtp_server = "smtp.gmail.com"
smtp_port = 587
if not sender_email or not sender_password:
st.error("Environment variables not loaded. Check your .env file.")
return
msg = EmailMessage()
msg["Subject"] = f"Website Update Notification for {url}"
msg["From"] = sender_email
msg["To"] = to_email
msg.set_content(f"""
Website: {url}
Title: {title}
Content (preview): {content[:200]}...
Tracked at: {timestamp}
""")
try:
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(sender_email, sender_password)
server.send_message(msg)
st.success(f"Notification email sent to {to_email}")
except smtplib.SMTPException as e:
st.error(f"SMTP Error: {e}")
# Fetch website data
def fetch_website_data(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.string.strip() if soup.title else 'No title available'
paragraphs = soup.find_all('p')
content = ' '.join([p.text.strip() for p in paragraphs]) if paragraphs else 'New Notification available'
return title, content
except requests.exceptions.RequestException as e:
st.error(f"Error fetching website data: {e}")
return "Error occurred", "New notification detected. No content available due to an error."
# Track websites and store updates in Google Sheets
def track_websites(urls, recipient_email, interval=60, max_duration=20*60):
st.write(f"Started tracking for {recipient_email}")
service = authenticate_google_sheets()
last_updates = {} # To track changes in website content
start_time = datetime.now() # Record the start time
end_time = start_time + timedelta(seconds=max_duration) # Set end time (20 minutes later)
while not stop_event.is_set() and datetime.now() < end_time:
for url in urls:
title, content = fetch_website_data(url)
if title and content:
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# Check for updates
if url not in last_updates or last_updates[url] != (title, content):
last_updates[url] = (title, content)
# Append to Google Sheets
append_to_google_sheets(service, url, title, content, timestamp)
# Send notification email
try:
send_email_notification(recipient_email, url, title, content, timestamp)
except Exception as e:
st.error(f"Error sending email notification: {e}")
# Wait for the next interval or until stop_event is set
stop_event.wait(interval)
st.write("Stopped tracking after 20 minutes.")
# Display tracking status
def display_tracking_status():
st.title("Update Tracking System with Notifications")
email_input = st.text_input("Enter your email for notifications:")
# Maintain thread state
if "tracking_thread" not in st.session_state:
st.session_state["tracking_thread"] = None
if email_input:
# Start tracking
if st.button("Tracking"):
if st.session_state["tracking_thread"] is None or not st.session_state["tracking_thread"].is_alive():
stop_event.clear() # Clear the stop flag to allow tracking
thread = threading.Thread(target=track_websites, args=(TRACKING_URLS, email_input), daemon=True)
thread.start()
st.session_state["tracking_thread"] = thread
st.success(f"Notifications will be sent to {email_input}.")
else:
st.warning("Tracking Updates is already running.")
# Stop tracking
if st.button("Stop Tracking"):
if st.session_state["tracking_thread"] is not None and st.session_state["tracking_thread"].is_alive():
stop_event.set() # Signal the thread to stop
st.session_state["tracking_thread"].join() # Wait for the thread to finish
st.session_state["tracking_thread"] = None
st.success("Tracking stopped.")
else:
st.warning("No active tracking to stop.")
# Main function
def main():
display_tracking_status()
if __name__ == "__main__":
main()