Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
logging changes
Browse files- app.py +54 -11
- utils/logger.py +137 -0
app.py
CHANGED
@@ -9,7 +9,33 @@ from utils.retriever import retrieve_paragraphs
|
|
9 |
from utils.generator import generate
|
10 |
import json
|
11 |
import ast
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# Sample questions for examples
|
14 |
SAMPLE_QUESTIONS = {
|
15 |
"Análisis de la deforestación": [
|
@@ -73,7 +99,7 @@ def make_html_source(source,i):
|
|
73 |
|
74 |
return card
|
75 |
|
76 |
-
async def chat_response(query, history, method, country, uploaded_file):
|
77 |
"""Generate chat response based on method and inputs"""
|
78 |
|
79 |
# Skip processing if this is an auto-generated file analysis message
|
@@ -128,6 +154,14 @@ async def chat_response(query, history, method, country, uploaded_file):
|
|
128 |
docs_html = "".join(docs_html)
|
129 |
response = await generate(query=query, context=retrieved_paragraphs)
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
except Exception as e:
|
132 |
response = f"Error retrieving information: {str(e)}"
|
133 |
|
@@ -491,24 +525,33 @@ with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo:
|
|
491 |
outputs=[textbox]
|
492 |
)
|
493 |
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
# Feedback buttons
|
502 |
okay_btn.click(
|
503 |
-
lambda: (
|
|
|
504 |
outputs=[feedback_row, feedback_thanks]
|
505 |
)
|
506 |
|
507 |
not_okay_btn.click(
|
508 |
-
lambda: (
|
|
|
509 |
outputs=[feedback_row, feedback_thanks]
|
510 |
)
|
511 |
|
512 |
# Launch the app
|
513 |
if __name__ == "__main__":
|
514 |
-
demo.launch()
|
|
|
9 |
from utils.generator import generate
|
10 |
import json
|
11 |
import ast
|
12 |
+
from utils.logger import ChatLogger
|
13 |
+
from pathlib import Path
|
14 |
+
from huggingface_hub import CommitScheduler, HfApi
|
15 |
+
import os
|
16 |
+
# fetch tokens from Gradio secrets
|
17 |
+
SPACES_LOG = os.environ.get("EUDR_SPACES_LOG")
|
18 |
+
if not SPACES_LOG:
|
19 |
+
raise ValueError("EUDR_SPACES_LOG not found in environment")
|
20 |
+
# create the local logs repo
|
21 |
+
JSON_DATASET_DIR = Path("json_dataset")
|
22 |
+
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
23 |
+
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
|
24 |
|
25 |
+
# the logs are written to dataset repo periodically from local logs
|
26 |
+
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
|
27 |
+
scheduler = CommitScheduler(
|
28 |
+
repo_id="GIZ/spaces_logs",
|
29 |
+
repo_type="dataset",
|
30 |
+
folder_path=JSON_DATASET_DIR,
|
31 |
+
path_in_repo="eudr_chatbot",
|
32 |
+
token=SPACES_LOG )
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
# Initialize logger with shared scheduler
|
37 |
+
# scheduler.start() # Start the scheduler
|
38 |
+
chat_logger = ChatLogger(scheduler=scheduler)
|
39 |
# Sample questions for examples
|
40 |
SAMPLE_QUESTIONS = {
|
41 |
"Análisis de la deforestación": [
|
|
|
99 |
|
100 |
return card
|
101 |
|
102 |
+
async def chat_response(query, history, method, country, uploaded_file, request=None):
|
103 |
"""Generate chat response based on method and inputs"""
|
104 |
|
105 |
# Skip processing if this is an auto-generated file analysis message
|
|
|
154 |
docs_html = "".join(docs_html)
|
155 |
response = await generate(query=query, context=retrieved_paragraphs)
|
156 |
|
157 |
+
# Log the interaction
|
158 |
+
chat_logger.log(
|
159 |
+
query=query,
|
160 |
+
answer=response,
|
161 |
+
retrieved_content=context_retrieved_lst,
|
162 |
+
request=request
|
163 |
+
)
|
164 |
+
|
165 |
except Exception as e:
|
166 |
response = f"Error retrieving information: {str(e)}"
|
167 |
|
|
|
525 |
outputs=[textbox]
|
526 |
)
|
527 |
|
528 |
+
|
529 |
+
# Feedback buttons
|
530 |
+
def log_feedback(feedback, chatbot):
|
531 |
+
# Get the last interaction from chatbot history
|
532 |
+
if chatbot and len(chatbot) > 0:
|
533 |
+
last_query, last_response = chatbot[-1]
|
534 |
+
chat_logger.log(
|
535 |
+
query=last_query,
|
536 |
+
answer=last_response,
|
537 |
+
retrieved_content=[], # We don't have access to the original retrieved content here
|
538 |
+
feedback=feedback
|
539 |
+
)
|
540 |
+
return (gr.update(visible=False), gr.update(visible=True))
|
541 |
+
|
542 |
# Feedback buttons
|
543 |
okay_btn.click(
|
544 |
+
lambda chatbot: log_feedback("positive", chatbot),
|
545 |
+
inputs=[chatbot],
|
546 |
outputs=[feedback_row, feedback_thanks]
|
547 |
)
|
548 |
|
549 |
not_okay_btn.click(
|
550 |
+
lambda chatbot: log_feedback("negative", chatbot),
|
551 |
+
inputs=[chatbot],
|
552 |
outputs=[feedback_row, feedback_thanks]
|
553 |
)
|
554 |
|
555 |
# Launch the app
|
556 |
if __name__ == "__main__":
|
557 |
+
demo.launch()
|
utils/logger.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
from datetime import datetime
|
4 |
+
from uuid import uuid4
|
5 |
+
import requests
|
6 |
+
from pathlib import Path
|
7 |
+
from datasets import load_dataset, Dataset
|
8 |
+
import os
|
9 |
+
from huggingface_hub import CommitScheduler, HfApi
|
10 |
+
import random
|
11 |
+
|
12 |
+
class ChatLogger:
|
13 |
+
def __init__(self, scheduler):
|
14 |
+
"""Initialize the chat logger with paths and configurations"""
|
15 |
+
if not scheduler:
|
16 |
+
raise ValueError("Scheduler is required")
|
17 |
+
|
18 |
+
self.scheduler = scheduler
|
19 |
+
self.json_dataset_dir = scheduler.folder_path
|
20 |
+
self.logs_path = self.json_dataset_dir / f"logs-{uuid4()}.jsonl"
|
21 |
+
|
22 |
+
def get_client_ip(self, request=None):
|
23 |
+
"""Get the client IP address from the request context"""
|
24 |
+
try:
|
25 |
+
if request:
|
26 |
+
# Try different headers that might contain the real IP
|
27 |
+
ip = request.client.host
|
28 |
+
# Check for proxy headers
|
29 |
+
forwarded_for = request.headers.get('X-Forwarded-For')
|
30 |
+
if forwarded_for:
|
31 |
+
# X-Forwarded-For can contain multiple IPs - first one is the client
|
32 |
+
ip = forwarded_for.split(',')[0].strip()
|
33 |
+
|
34 |
+
logging.debug(f"Client IP detected: {ip}")
|
35 |
+
return ip
|
36 |
+
except Exception as e:
|
37 |
+
logging.error(f"Error getting client IP: {e}")
|
38 |
+
return "127.0.0.1"
|
39 |
+
|
40 |
+
def get_client_location(self, ip_address):
|
41 |
+
"""Get geolocation info using ipapi.co"""
|
42 |
+
headers = {
|
43 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
44 |
+
}
|
45 |
+
try:
|
46 |
+
response = requests.get(
|
47 |
+
f'https://ipapi.co/{ip_address}/json/',
|
48 |
+
headers=headers,
|
49 |
+
timeout=5
|
50 |
+
)
|
51 |
+
if response.status_code == 200:
|
52 |
+
data = response.json()
|
53 |
+
# Add random noise between -0.01 and 0.01 degrees (roughly ±1km)
|
54 |
+
lat = data.get('latitude')
|
55 |
+
lon = data.get('longitude')
|
56 |
+
if lat is not None and lon is not None:
|
57 |
+
lat += random.uniform(-0.01, 0.01)
|
58 |
+
lon += random.uniform(-0.01, 0.01)
|
59 |
+
|
60 |
+
return {
|
61 |
+
'city': data.get('city'),
|
62 |
+
'region': data.get('region'),
|
63 |
+
'country': data.get('country_name'),
|
64 |
+
'latitude': lat,
|
65 |
+
'longitude': lon
|
66 |
+
}
|
67 |
+
elif response.status_code == 429:
|
68 |
+
logging.warning(f"Rate limit exceeded for IP lookup")
|
69 |
+
return None
|
70 |
+
else:
|
71 |
+
logging.error(f"Error in IP lookup: Status code {response.status_code}")
|
72 |
+
return None
|
73 |
+
|
74 |
+
except requests.exceptions.RequestException as e:
|
75 |
+
logging.error(f"Request failed in IP lookup: {str(e)}")
|
76 |
+
return None
|
77 |
+
|
78 |
+
def create_log_entry(self, query, answer, retrieved_content, feedback=None, request=None):
|
79 |
+
"""Create a structured log entry with all required fields"""
|
80 |
+
timestamp = datetime.now().timestamp()
|
81 |
+
|
82 |
+
# Get client location if request is provided
|
83 |
+
ip = self.get_client_ip(request) if request else None
|
84 |
+
location = self.get_client_location(ip) if ip else None
|
85 |
+
|
86 |
+
log_entry = {
|
87 |
+
"record_id": str(uuid4()),
|
88 |
+
"session_id": str(uuid4()), # In practice, this should be passed in from the session
|
89 |
+
"time": str(timestamp),
|
90 |
+
"client_location": location,
|
91 |
+
"question": query,
|
92 |
+
"answer": answer,
|
93 |
+
"retrieved_content": retrieved_content if isinstance(retrieved_content, list) else [retrieved_content],
|
94 |
+
"feedback": feedback
|
95 |
+
}
|
96 |
+
|
97 |
+
return log_entry
|
98 |
+
|
99 |
+
def save_local(self, log_entry):
|
100 |
+
"""Save log entry to local JSONL file"""
|
101 |
+
try:
|
102 |
+
# Reorder fields for consistency
|
103 |
+
field_order = [
|
104 |
+
"record_id",
|
105 |
+
"session_id",
|
106 |
+
"time",
|
107 |
+
"client_location",
|
108 |
+
"question",
|
109 |
+
"answer",
|
110 |
+
"retrieved_content",
|
111 |
+
"feedback"
|
112 |
+
]
|
113 |
+
ordered_logs = {k: log_entry.get(k) for k in field_order if k in log_entry}
|
114 |
+
|
115 |
+
with self.scheduler.lock:
|
116 |
+
with open(self.logs_path, 'a') as f:
|
117 |
+
json.dump(ordered_logs, f)
|
118 |
+
f.write('\n')
|
119 |
+
logging.info("Log entry saved")
|
120 |
+
return True
|
121 |
+
except Exception as e:
|
122 |
+
logging.error(f"Error saving to local file: {str(e)}")
|
123 |
+
return False
|
124 |
+
|
125 |
+
def log(self, query, answer, retrieved_content, feedback=None, request=None):
|
126 |
+
"""Main logging method that handles both local and HF storage"""
|
127 |
+
# Create log entry
|
128 |
+
log_entry = self.create_log_entry(
|
129 |
+
query=query,
|
130 |
+
answer=answer,
|
131 |
+
retrieved_content=retrieved_content,
|
132 |
+
feedback=feedback,
|
133 |
+
request=request
|
134 |
+
)
|
135 |
+
|
136 |
+
# Save locally with thread safety
|
137 |
+
return self.save_local(log_entry)
|