Terry Zhuo commited on
Commit
7c88c71
·
1 Parent(s): fd079ad
Files changed (3) hide show
  1. app.py +145 -0
  2. log_reader.py +118 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from datetime import datetime, timedelta
4
+ import gradio as gr
5
+ import json
6
+ from log_reader import RemoteLogReader
7
+
8
+ # logging.basicConfig(level=logging.INFO)
9
+ # log = logging.getLogger(__name__)
10
+
11
+ def get_file_data(content: str) -> tuple[str, bool]:
12
+ """Read file content and return IP and vote condition status"""
13
+ try:
14
+ lines = [line.strip() for line in content.split('\n') if line.strip()]
15
+ if not lines:
16
+ return None, False
17
+
18
+ # Get IP from first line
19
+ try:
20
+ first_line_data = json.loads(lines[0])
21
+ ip = first_line_data.get('ip')
22
+ except json.JSONDecodeError:
23
+ ip = None
24
+
25
+ # Check vote conditions from last line
26
+ try:
27
+ last_line_data = json.loads(lines[-1])
28
+ feedback = last_line_data.get('feedback')
29
+ vote_conditions_met = (last_line_data.get('type') == 'vote' and
30
+ isinstance(feedback, dict) and
31
+ len(feedback) == 6)
32
+ except json.JSONDecodeError:
33
+ vote_conditions_met = False
34
+
35
+ return ip, vote_conditions_met
36
+ except Exception as e:
37
+ logging.error(f"Error processing file content: {e}")
38
+ return None, False
39
+
40
+ def search_battle_anony(date_str: str, search_query: str) -> list[list]:
41
+ """Search battle_anony conversations for a specific date and query."""
42
+ results = []
43
+
44
+ try:
45
+ # Initialize RemoteLogReader
46
+ reader = RemoteLogReader()
47
+
48
+ # Get conversation logs for battle_anony mode
49
+ conv_logs = reader.get_conv_logs(date_str)
50
+ battle_anony_logs = conv_logs.get('battle_anony', {})
51
+
52
+ # Process each conversation
53
+ for conv_id, logs in battle_anony_logs.items():
54
+ found_query = False
55
+ ip = None
56
+
57
+ # Convert messages to file content format for validation check
58
+ content = '\n'.join(json.dumps(msg) for msg in logs)
59
+ ip, is_valid = get_file_data(content)
60
+
61
+ if not ip:
62
+ continue
63
+
64
+ # Search through messages for the query
65
+ for log in logs:
66
+ if "state" in log and "messages" in log["state"]:
67
+ messages = log["state"]["messages"]
68
+
69
+ # Search through each message
70
+ for _, message in messages:
71
+ if search_query in message:
72
+ found_query = True
73
+ break
74
+
75
+ if found_query:
76
+ break
77
+
78
+ if found_query:
79
+ # Convert to list format instead of dict
80
+ results.append([
81
+ ip,
82
+ conv_id,
83
+ 'Yes' if is_valid else 'No',
84
+ 'Yes'
85
+ ])
86
+
87
+ except Exception as e:
88
+ logging.error(f"Error searching logs for date {date_str}: {e}")
89
+
90
+ return results
91
+
92
+ def create_ui():
93
+ def process_search(date, query):
94
+ if not query.strip():
95
+ return []
96
+
97
+ # Convert timestamp to datetime object
98
+ date_obj = datetime.fromtimestamp(date)
99
+ # Convert date to required format (YYYY_MM_DD)
100
+ date_str = date_obj.strftime("%Y_%m_%d")
101
+ results = search_battle_anony(date_str, query)
102
+
103
+ if not results:
104
+ # Return empty list with correct structure for DataFrame
105
+ return []
106
+ return results
107
+
108
+ with gr.Blocks(title="Battle Search") as app:
109
+ gr.Markdown("# Battle Search")
110
+ gr.Markdown("Search for specific content in battle_anony conversations by date and view IP addresses with validation status.")
111
+
112
+ with gr.Row():
113
+ date_input = gr.DateTime(
114
+ label="Select Date",
115
+ include_time=False,
116
+ )
117
+
118
+ query_input = gr.Textbox(
119
+ label="Search Query",
120
+ placeholder="Enter text to search for in conversations...",
121
+ lines=1
122
+ )
123
+
124
+ with gr.Row():
125
+ search_button = gr.Button("Search")
126
+
127
+ with gr.Row():
128
+ table_output = gr.DataFrame(
129
+ headers=['IP', 'Conversation ID', 'Is Valid', 'Contains Query'],
130
+ label="Results Table",
131
+ interactive=False,
132
+ value=[] # Initialize with empty list
133
+ )
134
+
135
+ search_button.click(
136
+ fn=process_search,
137
+ inputs=[date_input, query_input],
138
+ outputs=[table_output]
139
+ )
140
+
141
+ return app
142
+
143
+ if __name__ == "__main__":
144
+ app = create_ui()
145
+ app.launch()
log_reader.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Facade for reading logs on remote storage.
3
+ '''
4
+
5
+ from collections import defaultdict
6
+ import json
7
+ import os
8
+ from typing import Any
9
+ from azure.storage.fileshare import ShareServiceClient
10
+
11
+
12
+ class RemoteLogReader:
13
+ '''
14
+ remote log reader
15
+ '''
16
+
17
+ LOG_CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING") or ""
18
+ LOG_SHARE_NAME = "swearenalogsfileshare"
19
+
20
+ IMAGE_DIR_NAME = "serve_images"
21
+ '''
22
+ Directory for storing user uploaded images.
23
+ '''
24
+ CONV_LOG_DIR_NAME = "conv_logs"
25
+ '''
26
+ Directory for conversation logs.
27
+ '''
28
+ SANDBOX_LOG_DIR_NAME = "sandbox_logs"
29
+ '''
30
+ Directory for sandbox logs.
31
+ '''
32
+
33
+ CHAT_MODES = ["battle_anony", "battle_named", "direct"]
34
+
35
+ def __init__(
36
+ self,
37
+ connection_string: str = LOG_CONNECTION_STRING,
38
+ share_name: str = LOG_SHARE_NAME,
39
+ ):
40
+ if not connection_string:
41
+ raise ValueError("Connection string is required.")
42
+ if not share_name:
43
+ raise ValueError("Share name is required.")
44
+
45
+ self.share_service = ShareServiceClient.from_connection_string(
46
+ conn_str=connection_string)
47
+ self.share_client = self.share_service.get_share_client(share=share_name)
48
+
49
+ def is_conv_log(self, file_name: str) -> bool:
50
+ return file_name.startswith("conv-log") and file_name.endswith(".json")
51
+
52
+ def get_conv_id_from_name(self, file_name: str) -> str:
53
+ return file_name.split("-")[2].strip('.json')
54
+
55
+ def is_sandbox_log(self, file_name: str) -> bool:
56
+ return file_name.startswith("sandbox-log") and file_name.endswith(".json")
57
+
58
+ def get_file_content(self, file_path: str) -> bytes:
59
+ file_client = self.share_client.get_file_client(file_path)
60
+ file_content = file_client.download_file().readall()
61
+ return file_content
62
+
63
+ def get_conv_logs(self, date: str) -> dict[str, defaultdict[str, list[Any]]]:
64
+ '''
65
+ Return conversation logs based on the date.
66
+ Returns a dict:
67
+ mode -> conv_id -> list of logs.
68
+ '''
69
+ conv_logs = {
70
+ mode: defaultdict(list) for mode in self.CHAT_MODES
71
+ }
72
+ for mode in self.CHAT_MODES:
73
+ conv_log_dir = f"{date}/{self.CONV_LOG_DIR_NAME}/{mode}/"
74
+ # check if the directory exists
75
+ if not self.share_client.get_directory_client(conv_log_dir).exists():
76
+ continue
77
+ for file in self.share_client.list_directories_and_files(conv_log_dir):
78
+ if not self.is_conv_log(file.name):
79
+ continue
80
+ conv_id = self.get_conv_id_from_name(file.name)
81
+ file_content = self.get_file_content(
82
+ conv_log_dir + file.name).decode("utf-8").strip(' \n')
83
+ for line in file_content.split('\n'):
84
+ if line:
85
+ conv_logs[mode][conv_id].append(json.loads(line))
86
+ return conv_logs
87
+
88
+ def get_sandbox_logs(self, date: str) -> list[str]:
89
+ '''
90
+ Return sandbox logs based on the date.
91
+ '''
92
+ sandbox_logs = []
93
+ sandbox_log_dir = f"{date}/{self.SANDBOX_LOG_DIR_NAME}/"
94
+ for file in self.share_client.list_directories_and_files(sandbox_log_dir):
95
+ if self.is_sandbox_log(file.name):
96
+ file_content = self.get_file_content(
97
+ sandbox_log_dir + file.name).decode("utf-8").strip(' \n')
98
+ sandbox_logs.append(json.loads(file_content))
99
+ return sandbox_logs
100
+
101
+ def get_image(self, image_id: str) -> bytes:
102
+ '''
103
+ Return image data based on the image id.
104
+ '''
105
+ image_path = f"{self.IMAGE_DIR_NAME}/{image_id}.png"
106
+ return self.get_file_content(image_path)
107
+
108
+
109
+ if __name__ == "__main__":
110
+ # Example usages
111
+ log_reader = RemoteLogReader()
112
+ date = "2025_02_20"
113
+ conv_logs = log_reader.get_conv_logs(date)
114
+ sandbox_logs = log_reader.get_sandbox_logs(date)
115
+ image_data = log_reader.get_image("051fdac24285ff6e219a9ba06d1ac843")
116
+ print(conv_logs)
117
+ print(sandbox_logs)
118
+ print(image_data)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.0.0
2
+ pandas>=2.0.0
3
+ azure-storage-file-share