MeetInCode commited on
Commit
ff17e47
·
1 Parent(s): 815a1eb

Add application file

Browse files
Files changed (1) hide show
  1. app.py +281 -0
app.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TFAutoModelForSeq2SeqLM
5
+
6
+ # --- Model Loading ---
7
+ # Summarization model (BART)
8
+ def load_summarizer():
9
+ model_name = "VidhuMathur/bart-log-summarization"
10
+ model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ summarizer = pipeline(
13
+ "summarization",
14
+ model=model,
15
+ tokenizer=tokenizer,
16
+ device=0 if torch.cuda.is_available() else -1,
17
+ )
18
+ return summarizer
19
+
20
+ # Causal LM for analysis (Qwen)
21
+ def load_qwen():
22
+ model_name = "Qwen/Qwen3-0.6B"
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
24
+ model = AutoModelForCausalLM.from_pretrained(
25
+ model_name,
26
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
27
+ )
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ model = model.to(device)
30
+ if tokenizer.pad_token is None:
31
+ tokenizer.pad_token = tokenizer.eos_token
32
+ return model, tokenizer
33
+
34
+ # --- Core Pipeline Functions ---
35
+ def extract_json_simple(text):
36
+ start = text.find('{')
37
+ if start == -1:
38
+ return None
39
+ brace_count = 0
40
+ end = start
41
+ for i, char in enumerate(text[start:], start):
42
+ if char == '{':
43
+ brace_count += 1
44
+ elif char == '}':
45
+ brace_count -= 1
46
+ if brace_count == 0:
47
+ end = i + 1
48
+ break
49
+ if brace_count == 0:
50
+ return text[start:end]
51
+ return None
52
+
53
+ def ensure_required_keys(analysis, summary):
54
+ required_keys = {
55
+ "root_cause": f"Issue identified from log analysis: {summary[:100]}...",
56
+ "debugging_steps": [
57
+ "Check system logs for error patterns",
58
+ "Verify service status and configuration",
59
+ "Test connectivity and permissions"
60
+ ],
61
+ "debug_commands": [
62
+ "systemctl status service-name",
63
+ "journalctl -u service-name -n 50",
64
+ "netstat -tlnp | grep port"
65
+ ],
66
+ "useful_links": [
67
+ "https://docs.system-docs.com/troubleshooting",
68
+ "https://stackoverflow.com/questions/tagged/debugging"
69
+ ]
70
+ }
71
+ for key, default_value in required_keys.items():
72
+ if key not in analysis or not analysis[key]:
73
+ analysis[key] = default_value
74
+ elif isinstance(analysis[key], list) and len(analysis[key]) == 0:
75
+ analysis[key] = default_value
76
+ return analysis
77
+
78
+ def create_fallback_analysis(summary):
79
+ summary_lower = summary.lower()
80
+ if any(word in summary_lower for word in ['database', 'connection', 'sql']):
81
+ return {
82
+ "root_cause": "Database connection issue detected in the logs",
83
+ "debugging_steps": [
84
+ "Check if database service is running",
85
+ "Verify database connection parameters",
86
+ "Test network connectivity to database server",
87
+ "Check database user permissions"
88
+ ],
89
+ "debug_commands": [
90
+ "sudo systemctl status postgresql",
91
+ "netstat -an | grep 5432",
92
+ "psql -U username -h host -d database",
93
+ "ping database-host"
94
+ ],
95
+ "useful_links": [
96
+ "https://www.postgresql.org/docs/current/runtime.html",
97
+ "https://dev.mysql.com/doc/refman/8.0/en/troubleshooting.html"
98
+ ]
99
+ }
100
+ elif any(word in summary_lower for word in ['memory', 'heap', 'oom']):
101
+ return {
102
+ "root_cause": "Memory exhaustion or memory leak detected",
103
+ "debugging_steps": [
104
+ "Monitor current memory usage",
105
+ "Check for memory leaks in application",
106
+ "Review JVM heap settings if Java application",
107
+ "Analyze memory dump if available"
108
+ ],
109
+ "debug_commands": [
110
+ "free -h",
111
+ "top -o %MEM",
112
+ "jstat -gc PID",
113
+ "ps aux --sort=-%mem | head"
114
+ ],
115
+ "useful_links": [
116
+ "https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/memleaks.html",
117
+ "https://linux.die.net/man/1/free"
118
+ ]
119
+ }
120
+ elif any(word in summary_lower for word in ['disk', 'space', 'full']):
121
+ return {
122
+ "root_cause": "Disk space exhaustion causing system issues",
123
+ "debugging_steps": [
124
+ "Check disk usage across all filesystems",
125
+ "Identify largest files and directories",
126
+ "Clean up temporary files and logs",
127
+ "Check for deleted files held by processes"
128
+ ],
129
+ "debug_commands": [
130
+ "df -h",
131
+ "du -sh /* | sort -hr",
132
+ "find /var/log -type f -size +100M",
133
+ "lsof +L1"
134
+ ],
135
+ "useful_links": [
136
+ "https://linux.die.net/man/1/df",
137
+ "https://www.cyberciti.biz/faq/linux-check-disk-space-command/"
138
+ ]
139
+ }
140
+ else:
141
+ return {
142
+ "root_cause": f"System issue detected: {summary[:100]}...",
143
+ "debugging_steps": [
144
+ "Review complete error logs",
145
+ "Check system resource usage",
146
+ "Verify service configurations",
147
+ "Test system connectivity"
148
+ ],
149
+ "debug_commands": [
150
+ "systemctl --failed",
151
+ "journalctl -p err -n 50",
152
+ "htop",
153
+ "netstat -tlnp"
154
+ ],
155
+ "useful_links": [
156
+ "https://linux.die.net/man/1/systemctl",
157
+ "https://www.freedesktop.org/software/systemd/man/journalctl.html"
158
+ ]
159
+ }
160
+
161
+ def log_processing_pipeline(raw_log, summarizer, model, tokenizer):
162
+ results = {
163
+ 'raw_log': raw_log,
164
+ 'summary': None,
165
+ 'analysis': None,
166
+ 'success': False,
167
+ 'errors': []
168
+ }
169
+ # Step 1: Summarization
170
+ try:
171
+ summary_result = summarizer(raw_log, max_length=350, min_length=40, do_sample=False)
172
+ summary_text = summary_result[0]['summary_text']
173
+ results['summary'] = summary_text
174
+ except Exception as e:
175
+ results['errors'].append(f"Summarization failed: {e}")
176
+ return results
177
+ # Step 2: Analysis
178
+ success = False
179
+ attempts = 0
180
+ max_attempts = 2
181
+ while not success and attempts < max_attempts:
182
+ attempts += 1
183
+ prompt = f"""Analyze this log summary and respond with ONLY a JSON object:\n\nLog: {summary_text}\n\nRequired JSON format:\n{{\n \"root_cause\": \"explain the main problem\",\n \"debugging_steps\": [\"step 1\", \"step 2\", \"step 3\"],\n \"debug_commands\": [\"command1\", \"command2\", \"command3\"],\n \"useful_links\": [\"link1\", \"link2\"]\n}}\n\nJSON:"""
184
+ try:
185
+ inputs = tokenizer(prompt, return_tensors="pt", max_length=800, truncation=True)
186
+ device = next(model.parameters()).device
187
+ inputs = {k: v.to(device) for k, v in inputs.items()}
188
+ with torch.no_grad():
189
+ outputs = model.generate(
190
+ **inputs,
191
+ max_new_tokens=300,
192
+ temperature=0.2,
193
+ do_sample=True,
194
+ pad_token_id=tokenizer.eos_token_id,
195
+ eos_token_id=tokenizer.eos_token_id,
196
+ repetition_penalty=1.1
197
+ )
198
+ response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
199
+ json_str = extract_json_simple(response)
200
+ if json_str:
201
+ try:
202
+ parsed = json.loads(json_str)
203
+ fixed_analysis = ensure_required_keys(parsed, summary_text)
204
+ results['analysis'] = fixed_analysis
205
+ results['success'] = True
206
+ success = True
207
+ except json.JSONDecodeError:
208
+ if attempts == max_attempts:
209
+ results['errors'].append(f"JSON parsing failed after {attempts} attempts")
210
+ else:
211
+ if attempts == max_attempts:
212
+ results['errors'].append("No valid JSON found in response")
213
+ except Exception as e:
214
+ if attempts == max_attempts:
215
+ results['errors'].append(f"Generation failed: {e}")
216
+ if not results['success']:
217
+ results['analysis'] = create_fallback_analysis(summary_text)
218
+ results['success'] = True
219
+ results['errors'].append("Used fallback analysis due to model issues")
220
+ return results
221
+
222
+ # --- Gradio Interface ---
223
+ def process_log_file(file_obj, summarizer, model, tokenizer):
224
+ if file_obj is None:
225
+ return ("No file uploaded", "", "", "", "")
226
+ try:
227
+ encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
228
+ log_content = None
229
+ for encoding in encodings:
230
+ try:
231
+ with open(file_obj.name, 'r', encoding=encoding) as f:
232
+ log_content = f.read()
233
+ break
234
+ except UnicodeDecodeError:
235
+ continue
236
+ if log_content is None:
237
+ return ("Encoding error", "", "", "", "")
238
+ if not log_content.strip():
239
+ return ("Empty file", "", "", "", "")
240
+ if len(log_content) > 100000:
241
+ log_content = log_content[:100000] + "\n... (file truncated)"
242
+ results = log_processing_pipeline(log_content, summarizer, model, tokenizer)
243
+ if results['success']:
244
+ analysis = results['analysis']
245
+ return (
246
+ "Analysis complete",
247
+ results['summary'],
248
+ analysis.get('root_cause', ''),
249
+ '\n'.join(analysis.get('debugging_steps', [])),
250
+ '\n'.join(analysis.get('debug_commands', [])),
251
+ '\n'.join(analysis.get('useful_links', [])),
252
+ json.dumps(results, indent=2)
253
+ )
254
+ else:
255
+ return ("Analysis failed", "", "", "", "")
256
+ except Exception as e:
257
+ return (f"Processing error: {str(e)}", "", "", "", "")
258
+
259
+ def main():
260
+ summarizer = load_summarizer()
261
+ model, tokenizer = load_qwen()
262
+ with gr.Blocks(title="Minimal LogLens") as app:
263
+ gr.Markdown("# Minimal LogLens Log Analyzer")
264
+ file_input = gr.File(label="Upload Log File", file_types=[".txt", ".log", ".out", ".err"], type="filepath")
265
+ analyze_btn = gr.Button("Analyze Log")
266
+ status = gr.Textbox(label="Status", interactive=False)
267
+ summary = gr.Textbox(label="Summary", lines=3, interactive=False)
268
+ root_cause = gr.Textbox(label="Root Cause", lines=2, interactive=False)
269
+ debug_steps = gr.Textbox(label="Debugging Steps", lines=4, interactive=False)
270
+ debug_commands = gr.Textbox(label="Debug Commands", lines=4, interactive=False)
271
+ useful_links = gr.Textbox(label="Useful Links", lines=2, interactive=False)
272
+ json_output = gr.Code(label="Full JSON Output", language="json", interactive=False)
273
+ analyze_btn.click(
274
+ fn=lambda f: process_log_file(f, summarizer, model, tokenizer),
275
+ inputs=file_input,
276
+ outputs=[status, summary, root_cause, debug_steps, debug_commands, useful_links, json_output]
277
+ )
278
+ app.launch()
279
+
280
+ if __name__ == "__main__":
281
+ main()