Spaces:
Build error
Build error
import requests | |
from bs4 import BeautifulSoup | |
import time | |
import streamlit as st | |
import threading | |
from datetime import datetime, timedelta | |
import smtplib | |
from email.message import EmailMessage | |
import os | |
from dotenv import load_dotenv | |
from google.oauth2.service_account import Credentials | |
from googleapiclient.discovery import build | |
# Load environment variables from .env file | |
load_dotenv() | |
# Google Sheets Configuration | |
SHEET_ID = '1bZjlA-UJrBhWS2jHlEQ-7nbmDvxpEoKylgxHW51Hhzc' # Google Sheets ID | |
RANGE = 'Sheet1!A:D' # The range where you want to append the data | |
# Predefined list of URLs to track | |
TRACKING_URLS = [ | |
"https://gdpr-info.eu/recitals/no-1/"] | |
# Event to signal thread termination | |
stop_event = threading.Event() | |
# Authenticate Google Sheets API | |
def authenticate_google_sheets(): | |
creds = Credentials.from_service_account_file( | |
'Credentials.json', | |
scopes=['https://www.googleapis.com/auth/spreadsheets'] | |
) | |
service = build('sheets', 'v4', credentials=creds) | |
return service | |
# Append data to Google Sheets | |
def append_to_google_sheets(service, url, title, content, timestamp): | |
values = [ | |
[url, title, content[:200], timestamp] # Prepare row to append | |
] | |
body = {'values': values} | |
try: | |
service.spreadsheets().values().append( | |
spreadsheetId=SHEET_ID, | |
range=RANGE, | |
valueInputOption="RAW", | |
body=body | |
).execute() | |
st.write(f"Data appended to Google Sheets at {timestamp}.") | |
except Exception as e: | |
st.error(f"Error appending to Google Sheets: {e}") | |
# Send email notification | |
def send_email_notification(to_email, url, title, content, timestamp): | |
sender_email = os.getenv("EMAIL_ADDRESS") | |
sender_password = os.getenv("EMAIL_PASSWORD") | |
smtp_server = "smtp.gmail.com" | |
smtp_port = 587 | |
if not sender_email or not sender_password: | |
st.error("Environment variables not loaded. Check your .env file.") | |
return | |
msg = EmailMessage() | |
msg["Subject"] = f"Website Update Notification for {url}" | |
msg["From"] = sender_email | |
msg["To"] = to_email | |
msg.set_content(f""" | |
Website: {url} | |
Title: {title} | |
Content (preview): {content[:200]}... | |
Tracked at: {timestamp} | |
""") | |
try: | |
with smtplib.SMTP(smtp_server, smtp_port) as server: | |
server.starttls() | |
server.login(sender_email, sender_password) | |
server.send_message(msg) | |
st.success(f"Notification email sent to {to_email}") | |
except smtplib.SMTPException as e: | |
st.error(f"SMTP Error: {e}") | |
# Fetch website data | |
def fetch_website_data(url): | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
title = soup.title.string.strip() if soup.title else 'No title available' | |
paragraphs = soup.find_all('p') | |
content = ' '.join([p.text.strip() for p in paragraphs]) if paragraphs else 'New Notification available' | |
return title, content | |
except requests.exceptions.RequestException as e: | |
st.error(f"Error fetching website data: {e}") | |
return "Error occurred", "New notification detected. No content available due to an error." | |
# Track websites and store updates in Google Sheets | |
def track_websites(urls, recipient_email, interval=60, max_duration=20*60): | |
st.write(f"Started tracking for {recipient_email}") | |
service = authenticate_google_sheets() | |
last_updates = {} # To track changes in website content | |
start_time = datetime.now() # Record the start time | |
end_time = start_time + timedelta(seconds=max_duration) # Set end time (20 minutes later) | |
while not stop_event.is_set() and datetime.now() < end_time: | |
for url in urls: | |
title, content = fetch_website_data(url) | |
if title and content: | |
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') | |
# Check for updates | |
if url not in last_updates or last_updates[url] != (title, content): | |
last_updates[url] = (title, content) | |
# Append to Google Sheets | |
append_to_google_sheets(service, url, title, content, timestamp) | |
# Send notification email | |
try: | |
send_email_notification(recipient_email, url, title, content, timestamp) | |
except Exception as e: | |
st.error(f"Error sending email notification: {e}") | |
# Wait for the next interval or until stop_event is set | |
stop_event.wait(interval) | |
st.write("Stopped tracking after 20 minutes.") | |
# Display tracking status | |
def display_tracking_status(): | |
st.title("Update Tracking System with Notifications") | |
email_input = st.text_input("Enter your email for notifications:") | |
# Maintain thread state | |
if "tracking_thread" not in st.session_state: | |
st.session_state["tracking_thread"] = None | |
if email_input: | |
# Start tracking | |
if st.button("Tracking"): | |
if st.session_state["tracking_thread"] is None or not st.session_state["tracking_thread"].is_alive(): | |
stop_event.clear() # Clear the stop flag to allow tracking | |
thread = threading.Thread(target=track_websites, args=(TRACKING_URLS, email_input), daemon=True) | |
thread.start() | |
st.session_state["tracking_thread"] = thread | |
st.success(f"Notifications will be sent to {email_input}.") | |
else: | |
st.warning("Tracking Updates is already running.") | |
# Stop tracking | |
if st.button("Stop Tracking"): | |
if st.session_state["tracking_thread"] is not None and st.session_state["tracking_thread"].is_alive(): | |
stop_event.set() # Signal the thread to stop | |
st.session_state["tracking_thread"].join() # Wait for the thread to finish | |
st.session_state["tracking_thread"] = None | |
st.success("Tracking stopped.") | |
else: | |
st.warning("No active tracking to stop.") | |
# Main function | |
def main(): | |
display_tracking_status() | |
if __name__ == "__main__": | |
main() | |