Spaces:
GIZ
/
Running on CPU Upgrade

Romulan12 commited on
Commit
95c8547
·
1 Parent(s): c464ae6

logging changes

Browse files
Files changed (2) hide show
  1. app.py +54 -11
  2. utils/logger.py +137 -0
app.py CHANGED
@@ -9,7 +9,33 @@ from utils.retriever import retrieve_paragraphs
9
  from utils.generator import generate
10
  import json
11
  import ast
 
 
 
 
 
 
 
 
 
 
 
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Sample questions for examples
14
  SAMPLE_QUESTIONS = {
15
  "Análisis de la deforestación": [
@@ -73,7 +99,7 @@ def make_html_source(source,i):
73
 
74
  return card
75
 
76
- async def chat_response(query, history, method, country, uploaded_file):
77
  """Generate chat response based on method and inputs"""
78
 
79
  # Skip processing if this is an auto-generated file analysis message
@@ -128,6 +154,14 @@ async def chat_response(query, history, method, country, uploaded_file):
128
  docs_html = "".join(docs_html)
129
  response = await generate(query=query, context=retrieved_paragraphs)
130
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
  response = f"Error retrieving information: {str(e)}"
133
 
@@ -491,24 +525,33 @@ with gr.Blocks(title="EUDR Bot", theme=theme, css="style.css") as demo:
491
  outputs=[textbox]
492
  )
493
 
494
- # Sample questions dropdown
495
- dropdown_samples.change(
496
- change_sample_questions,
497
- [dropdown_samples],
498
- sample_groups
499
- )
500
-
 
 
 
 
 
 
 
501
  # Feedback buttons
502
  okay_btn.click(
503
- lambda: (gr.update(visible=False), gr.update(visible=True)),
 
504
  outputs=[feedback_row, feedback_thanks]
505
  )
506
 
507
  not_okay_btn.click(
508
- lambda: (gr.update(visible=False), gr.update(visible=True)),
 
509
  outputs=[feedback_row, feedback_thanks]
510
  )
511
 
512
  # Launch the app
513
  if __name__ == "__main__":
514
- demo.launch()
 
9
  from utils.generator import generate
10
  import json
11
  import ast
12
+ from utils.logger import ChatLogger
13
+ from pathlib import Path
14
+ from huggingface_hub import CommitScheduler, HfApi
15
+ import os
16
+ # fetch tokens from Gradio secrets
17
+ SPACES_LOG = os.environ.get("EUDR_SPACES_LOG")
18
+ if not SPACES_LOG:
19
+ raise ValueError("EUDR_SPACES_LOG not found in environment")
20
+ # create the local logs repo
21
+ JSON_DATASET_DIR = Path("json_dataset")
22
+ JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
23
+ JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
24
 
25
+ # the logs are written to dataset repo periodically from local logs
26
+ # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
27
+ scheduler = CommitScheduler(
28
+ repo_id="GIZ/spaces_logs",
29
+ repo_type="dataset",
30
+ folder_path=JSON_DATASET_DIR,
31
+ path_in_repo="eudr_chatbot",
32
+ token=SPACES_LOG )
33
+
34
+
35
+
36
+ # Initialize logger with shared scheduler
37
+ # scheduler.start() # Start the scheduler
38
+ chat_logger = ChatLogger(scheduler=scheduler)
39
  # Sample questions for examples
40
  SAMPLE_QUESTIONS = {
41
  "Análisis de la deforestación": [
 
99
 
100
  return card
101
 
102
+ async def chat_response(query, history, method, country, uploaded_file, request=None):
103
  """Generate chat response based on method and inputs"""
104
 
105
  # Skip processing if this is an auto-generated file analysis message
 
154
  docs_html = "".join(docs_html)
155
  response = await generate(query=query, context=retrieved_paragraphs)
156
 
157
+ # Log the interaction
158
+ chat_logger.log(
159
+ query=query,
160
+ answer=response,
161
+ retrieved_content=context_retrieved_lst,
162
+ request=request
163
+ )
164
+
165
  except Exception as e:
166
  response = f"Error retrieving information: {str(e)}"
167
 
 
525
  outputs=[textbox]
526
  )
527
 
528
+
529
+ # Feedback buttons
530
+ def log_feedback(feedback, chatbot):
531
+ # Get the last interaction from chatbot history
532
+ if chatbot and len(chatbot) > 0:
533
+ last_query, last_response = chatbot[-1]
534
+ chat_logger.log(
535
+ query=last_query,
536
+ answer=last_response,
537
+ retrieved_content=[], # We don't have access to the original retrieved content here
538
+ feedback=feedback
539
+ )
540
+ return (gr.update(visible=False), gr.update(visible=True))
541
+
542
  # Feedback buttons
543
  okay_btn.click(
544
+ lambda chatbot: log_feedback("positive", chatbot),
545
+ inputs=[chatbot],
546
  outputs=[feedback_row, feedback_thanks]
547
  )
548
 
549
  not_okay_btn.click(
550
+ lambda chatbot: log_feedback("negative", chatbot),
551
+ inputs=[chatbot],
552
  outputs=[feedback_row, feedback_thanks]
553
  )
554
 
555
  # Launch the app
556
  if __name__ == "__main__":
557
+ demo.launch()
utils/logger.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from datetime import datetime
4
+ from uuid import uuid4
5
+ import requests
6
+ from pathlib import Path
7
+ from datasets import load_dataset, Dataset
8
+ import os
9
+ from huggingface_hub import CommitScheduler, HfApi
10
+ import random
11
+
12
+ class ChatLogger:
13
+ def __init__(self, scheduler):
14
+ """Initialize the chat logger with paths and configurations"""
15
+ if not scheduler:
16
+ raise ValueError("Scheduler is required")
17
+
18
+ self.scheduler = scheduler
19
+ self.json_dataset_dir = scheduler.folder_path
20
+ self.logs_path = self.json_dataset_dir / f"logs-{uuid4()}.jsonl"
21
+
22
+ def get_client_ip(self, request=None):
23
+ """Get the client IP address from the request context"""
24
+ try:
25
+ if request:
26
+ # Try different headers that might contain the real IP
27
+ ip = request.client.host
28
+ # Check for proxy headers
29
+ forwarded_for = request.headers.get('X-Forwarded-For')
30
+ if forwarded_for:
31
+ # X-Forwarded-For can contain multiple IPs - first one is the client
32
+ ip = forwarded_for.split(',')[0].strip()
33
+
34
+ logging.debug(f"Client IP detected: {ip}")
35
+ return ip
36
+ except Exception as e:
37
+ logging.error(f"Error getting client IP: {e}")
38
+ return "127.0.0.1"
39
+
40
+ def get_client_location(self, ip_address):
41
+ """Get geolocation info using ipapi.co"""
42
+ headers = {
43
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
44
+ }
45
+ try:
46
+ response = requests.get(
47
+ f'https://ipapi.co/{ip_address}/json/',
48
+ headers=headers,
49
+ timeout=5
50
+ )
51
+ if response.status_code == 200:
52
+ data = response.json()
53
+ # Add random noise between -0.01 and 0.01 degrees (roughly ±1km)
54
+ lat = data.get('latitude')
55
+ lon = data.get('longitude')
56
+ if lat is not None and lon is not None:
57
+ lat += random.uniform(-0.01, 0.01)
58
+ lon += random.uniform(-0.01, 0.01)
59
+
60
+ return {
61
+ 'city': data.get('city'),
62
+ 'region': data.get('region'),
63
+ 'country': data.get('country_name'),
64
+ 'latitude': lat,
65
+ 'longitude': lon
66
+ }
67
+ elif response.status_code == 429:
68
+ logging.warning(f"Rate limit exceeded for IP lookup")
69
+ return None
70
+ else:
71
+ logging.error(f"Error in IP lookup: Status code {response.status_code}")
72
+ return None
73
+
74
+ except requests.exceptions.RequestException as e:
75
+ logging.error(f"Request failed in IP lookup: {str(e)}")
76
+ return None
77
+
78
+ def create_log_entry(self, query, answer, retrieved_content, feedback=None, request=None):
79
+ """Create a structured log entry with all required fields"""
80
+ timestamp = datetime.now().timestamp()
81
+
82
+ # Get client location if request is provided
83
+ ip = self.get_client_ip(request) if request else None
84
+ location = self.get_client_location(ip) if ip else None
85
+
86
+ log_entry = {
87
+ "record_id": str(uuid4()),
88
+ "session_id": str(uuid4()), # In practice, this should be passed in from the session
89
+ "time": str(timestamp),
90
+ "client_location": location,
91
+ "question": query,
92
+ "answer": answer,
93
+ "retrieved_content": retrieved_content if isinstance(retrieved_content, list) else [retrieved_content],
94
+ "feedback": feedback
95
+ }
96
+
97
+ return log_entry
98
+
99
+ def save_local(self, log_entry):
100
+ """Save log entry to local JSONL file"""
101
+ try:
102
+ # Reorder fields for consistency
103
+ field_order = [
104
+ "record_id",
105
+ "session_id",
106
+ "time",
107
+ "client_location",
108
+ "question",
109
+ "answer",
110
+ "retrieved_content",
111
+ "feedback"
112
+ ]
113
+ ordered_logs = {k: log_entry.get(k) for k in field_order if k in log_entry}
114
+
115
+ with self.scheduler.lock:
116
+ with open(self.logs_path, 'a') as f:
117
+ json.dump(ordered_logs, f)
118
+ f.write('\n')
119
+ logging.info("Log entry saved")
120
+ return True
121
+ except Exception as e:
122
+ logging.error(f"Error saving to local file: {str(e)}")
123
+ return False
124
+
125
+ def log(self, query, answer, retrieved_content, feedback=None, request=None):
126
+ """Main logging method that handles both local and HF storage"""
127
+ # Create log entry
128
+ log_entry = self.create_log_entry(
129
+ query=query,
130
+ answer=answer,
131
+ retrieved_content=retrieved_content,
132
+ feedback=feedback,
133
+ request=request
134
+ )
135
+
136
+ # Save locally with thread safety
137
+ return self.save_local(log_entry)