sohampawar1030 commited on
Commit
ac07e52
·
verified ·
1 Parent(s): 3e0d7cc

Rename Update_tracking.py to Update_tracker.py

Browse files
Files changed (2) hide show
  1. Update_tracker.py +122 -0
  2. Update_tracking.py +0 -169
Update_tracker.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import smtplib
5
+ from email.mime.text import MIMEText
6
+ from email.mime.multipart import MIMEMultipart
7
+ import gspread
8
+ from oauth2client.service_account import ServiceAccountCredentials
9
+ from dotenv import load_dotenv
10
+ import os
11
+
12
+ # Load environment variables from .env file
13
+ load_dotenv()
14
+
15
+ # Function to fetch live recitals from the GDPR website
16
+ def fetch_gdpr_recitals():
17
+ url = "https://gdpr-info.eu/recitals/"
18
+ response = requests.get(url)
19
+
20
+ if response.status_code != 200:
21
+ st.error("Failed to fetch data from the GDPR website.")
22
+ return {}
23
+
24
+ soup = BeautifulSoup(response.content, 'html.parser')
25
+ recitals = {}
26
+ articles = soup.find_all('div', class_='artikel')
27
+
28
+ for i, article in enumerate(articles):
29
+ if i >= 3: # Limit to 3 recitals for the demo
30
+ break
31
+ link = article.find('a')['href']
32
+ number = article.find('span', class_='nummer').text.strip('()')
33
+ title = article.find('span', class_='titel').text.strip()
34
+
35
+ rec_response = requests.get(link)
36
+ if rec_response.status_code == 200:
37
+ rec_soup = BeautifulSoup(rec_response.content, 'html.parser')
38
+ content = rec_soup.find('div', class_='entry-content').get_text(strip=True)
39
+ recitals[number] = {'title': title, 'content': content}
40
+ else:
41
+ st.error(f"Failed to fetch recital {number} from {link}")
42
+
43
+ return recitals
44
+
45
+ # Function to send email notifications
46
+ def send_email(recitals):
47
+ sender_email = os.getenv("EMAIL_ADDRESS")
48
+ receiver_email = os.getenv("RECEIVER_EMAIL")
49
+ password = os.getenv("EMAIL_PASSWORD")
50
+
51
+ subject = "GDPR Recitals Update"
52
+ body = "New GDPR recitals have been fetched:\n\n"
53
+
54
+ for number, details in recitals.items():
55
+ body += f"Recital {number}: {details['title']}\n{details['content']}\n\n"
56
+
57
+ msg = MIMEMultipart()
58
+ msg['From'] = sender_email
59
+ msg['To'] = receiver_email
60
+ msg['Subject'] = subject
61
+ msg.attach(MIMEText(body, 'plain'))
62
+
63
+ try:
64
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
65
+ server.starttls() # Secure the connection
66
+ server.login(sender_email, password) # Log in
67
+ server.send_message(msg) # Send the email
68
+ st.success("Email notification sent!")
69
+ except smtplib.SMTPAuthenticationError:
70
+ st.error("Failed to login: Check your email and password.")
71
+ except smtplib.SMTPConnectError:
72
+ st.error("Failed to connect to the SMTP server. Check your network connection.")
73
+ except smtplib.SMTPException as e:
74
+ st.error(f"SMTP error occurred: {str(e)}") # Improved error message
75
+ except Exception as e:
76
+ st.error(f"Failed to send email: {str(e)}") # General error message
77
+
78
+ # Function to store data in Google Sheets
79
+ def store_in_google_sheets(recitals):
80
+ scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
81
+ creds_path = os.getenv("GOOGLE_CREDENTIALS_PATH")
82
+
83
+ st.write(f"Google Credentials Path: {creds_path}") # Debug statement
84
+
85
+ if not creds_path or not os.path.exists(creds_path):
86
+ st.error("Google credentials path is invalid or not set.")
87
+ return
88
+
89
+ creds = ServiceAccountCredentials.from_json_keyfile_name(creds_path, scope)
90
+ client = gspread.authorize(creds)
91
+
92
+ sheet_id = os.getenv("GOOGLE_SHEET_ID")
93
+ if not sheet_id:
94
+ st.error("Google Sheet ID is not set.")
95
+ return
96
+
97
+ sheet = client.open_by_key(sheet_id).sheet1
98
+
99
+ for number, details in recitals.items():
100
+ sheet.append_row([number, details['title'], details['content']])
101
+ st.success("Data stored in Google Sheets!")
102
+
103
+ # Function to display the UP page
104
+ def display_Update_tracker_page():
105
+ st.title("Update Tracker - GDPR Recitals")
106
+
107
+ if st.button("Fetch Live Recitals"):
108
+ with st.spinner("Fetching updates..."):
109
+ recitals = fetch_gdpr_recitals()
110
+ if recitals:
111
+ for number, details in recitals.items():
112
+ st.markdown(f"*Recital {number}: {details['title']}*")
113
+ st.write(details['content'])
114
+
115
+ send_email(recitals)
116
+ store_in_google_sheets(recitals)
117
+ else:
118
+ st.write("No recitals found.")
119
+
120
+ # Run the display function
121
+ if __name__ == "__main__":
122
+ display_Update_tracker_page()
Update_tracking.py DELETED
@@ -1,169 +0,0 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- import time
4
- import streamlit as st
5
- import threading
6
- from datetime import datetime, timedelta
7
- import smtplib
8
- from email.message import EmailMessage
9
- import os
10
- from dotenv import load_dotenv
11
- from google.oauth2.service_account import Credentials
12
- from googleapiclient.discovery import build
13
-
14
- # Load environment variables from .env file
15
- load_dotenv()
16
-
17
- # Google Sheets Configuration
18
- SHEET_ID = '1bZjlA-UJrBhWS2jHlEQ-7nbmDvxpEoKylgxHW51Hhzc' # Google Sheets ID
19
- RANGE = 'Sheet1!A:D' # The range where you want to append the data
20
-
21
- # Predefined list of URLs to track
22
- TRACKING_URLS = [
23
- "https://gdpr-info.eu/recitals/no-1/"]
24
-
25
- # Event to signal thread termination
26
- stop_event = threading.Event()
27
-
28
- # Authenticate Google Sheets API
29
- def authenticate_google_sheets():
30
- creds = Credentials.from_service_account_file(
31
- 'Credentials.json',
32
- scopes=['https://www.googleapis.com/auth/spreadsheets']
33
- )
34
- service = build('sheets', 'v4', credentials=creds)
35
- return service
36
-
37
- # Append data to Google Sheets
38
- def append_to_google_sheets(service, url, title, content, timestamp):
39
- values = [
40
- [url, title, content[:200], timestamp] # Prepare row to append
41
- ]
42
- body = {'values': values}
43
- try:
44
- service.spreadsheets().values().append(
45
- spreadsheetId=SHEET_ID,
46
- range=RANGE,
47
- valueInputOption="RAW",
48
- body=body
49
- ).execute()
50
- st.write(f"Data appended to Google Sheets at {timestamp}.")
51
- except Exception as e:
52
- st.error(f"Error appending to Google Sheets: {e}")
53
-
54
- # Send email notification
55
- def send_email_notification(to_email, url, title, content, timestamp):
56
- sender_email = os.getenv("EMAIL_ADDRESS")
57
- sender_password = os.getenv("EMAIL_PASSWORD")
58
- smtp_server = "smtp.gmail.com"
59
- smtp_port = 587
60
-
61
- if not sender_email or not sender_password:
62
- st.error("Environment variables not loaded. Check your .env file.")
63
- return
64
-
65
- msg = EmailMessage()
66
- msg["Subject"] = f"Website Update Notification for {url}"
67
- msg["From"] = sender_email
68
- msg["To"] = to_email
69
- msg.set_content(f"""
70
- Website: {url}
71
- Title: {title}
72
- Content (preview): {content[:200]}...
73
- Tracked at: {timestamp}
74
- """)
75
-
76
- try:
77
- with smtplib.SMTP(smtp_server, smtp_port) as server:
78
- server.starttls()
79
- server.login(sender_email, sender_password)
80
- server.send_message(msg)
81
- st.success(f"Notification email sent to {to_email}")
82
- except smtplib.SMTPException as e:
83
- st.error(f"SMTP Error: {e}")
84
-
85
- # Fetch website data
86
- def fetch_website_data(url):
87
- try:
88
- response = requests.get(url, timeout=10)
89
- response.raise_for_status()
90
- soup = BeautifulSoup(response.text, 'html.parser')
91
- title = soup.title.string.strip() if soup.title else 'No title available'
92
- paragraphs = soup.find_all('p')
93
- content = ' '.join([p.text.strip() for p in paragraphs]) if paragraphs else 'New Notification available'
94
- return title, content
95
- except requests.exceptions.RequestException as e:
96
- st.error(f"Error fetching website data: {e}")
97
- return "Error occurred", "New notification detected. No content available due to an error."
98
-
99
- # Track websites and store updates in Google Sheets
100
- def track_websites(urls, recipient_email, interval=60, max_duration=20*60):
101
- st.write(f"Started tracking for {recipient_email}")
102
- service = authenticate_google_sheets()
103
- last_updates = {} # To track changes in website content
104
-
105
- start_time = datetime.now() # Record the start time
106
- end_time = start_time + timedelta(seconds=max_duration) # Set end time (20 minutes later)
107
-
108
- while not stop_event.is_set() and datetime.now() < end_time:
109
- for url in urls:
110
- title, content = fetch_website_data(url)
111
- if title and content:
112
- timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
113
-
114
- # Check for updates
115
- if url not in last_updates or last_updates[url] != (title, content):
116
- last_updates[url] = (title, content)
117
-
118
- # Append to Google Sheets
119
- append_to_google_sheets(service, url, title, content, timestamp)
120
-
121
- # Send notification email
122
- try:
123
- send_email_notification(recipient_email, url, title, content, timestamp)
124
- except Exception as e:
125
- st.error(f"Error sending email notification: {e}")
126
-
127
- # Wait for the next interval or until stop_event is set
128
- stop_event.wait(interval)
129
-
130
- st.write("Stopped tracking after 20 minutes.")
131
-
132
- # Display tracking status
133
- def display_tracking_status():
134
- st.title("Update Tracking System with Notifications")
135
-
136
- email_input = st.text_input("Enter your email for notifications:")
137
-
138
- # Maintain thread state
139
- if "tracking_thread" not in st.session_state:
140
- st.session_state["tracking_thread"] = None
141
-
142
- if email_input:
143
- # Start tracking
144
- if st.button("Tracking"):
145
- if st.session_state["tracking_thread"] is None or not st.session_state["tracking_thread"].is_alive():
146
- stop_event.clear() # Clear the stop flag to allow tracking
147
- thread = threading.Thread(target=track_websites, args=(TRACKING_URLS, email_input), daemon=True)
148
- thread.start()
149
- st.session_state["tracking_thread"] = thread
150
- st.success(f"Notifications will be sent to {email_input}.")
151
- else:
152
- st.warning("Tracking Updates is already running.")
153
-
154
- # Stop tracking
155
- if st.button("Stop Tracking"):
156
- if st.session_state["tracking_thread"] is not None and st.session_state["tracking_thread"].is_alive():
157
- stop_event.set() # Signal the thread to stop
158
- st.session_state["tracking_thread"].join() # Wait for the thread to finish
159
- st.session_state["tracking_thread"] = None
160
- st.success("Tracking stopped.")
161
- else:
162
- st.warning("No active tracking to stop.")
163
-
164
- # Main function
165
- def main():
166
- display_tracking_status()
167
-
168
- if __name__ == "__main__":
169
- main()