Raj-Maharajwala commited on
Commit
adab281
·
verified ·
1 Parent(s): 31cebe0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -30
README.md CHANGED
@@ -153,10 +153,8 @@ Memory: %(memory).2fMB
153
  # Add memory usage information
154
  if not hasattr(record, 'memory'):
155
  record.memory = psutil.Process().memory_info().rss / (1024 * 1024)
156
-
157
  log_fmt = self.FORMATS.get(record.levelno)
158
  formatter = logging.Formatter(log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
159
-
160
  # Add performance metrics if available
161
  if hasattr(record, 'duration'):
162
  record.message = f"{record.message}\nDuration: {record.duration:.2f}s"
@@ -169,13 +167,10 @@ def setup_logging(log_dir: str = "logs") -> logging.Logger:
169
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
170
  log_path = (Path(log_dir) / f"l_{timestamp}")
171
  log_path.mkdir(exist_ok=True)
172
-
173
- # Create logger
174
  logger = logging.getLogger("InsuranceLLM")
175
  # Clear any existing handlers
176
  logger.handlers.clear()
177
  logger.setLevel(logging.DEBUG)
178
-
179
  # Create handlers with level-specific files
180
  handlers = {
181
  'debug': (logging.FileHandler(log_path / f"debug_{timestamp}.log"), logging.DEBUG),
@@ -189,15 +184,11 @@ def setup_logging(log_dir: str = "logs") -> logging.Logger:
189
  enable_link_path=True
190
  ), logging.INFO)
191
  }
192
-
193
- # Configure handlers
194
  formatter = CustomFormatter()
195
  for (handler, level) in handlers.values():
196
  handler.setLevel(level)
197
  handler.setFormatter(formatter)
198
  logger.addHandler(handler)
199
-
200
- # Log startup information (will now appear only once)
201
  logger.info(f"Starting new session {timestamp}")
202
  logger.info(f"Log directory: {log_dir}")
203
  return logger
@@ -213,20 +204,16 @@ class PerformanceMetrics:
213
  self.tokens = 0
214
  self.response_times = []
215
  self.last_reset = self.start_time
216
-
217
  def reset_timer(self):
218
  """Reset the timer for individual response measurements"""
219
  self.last_reset = time.time()
220
-
221
  def update(self, tokens: int):
222
  self.tokens += tokens
223
  response_time = time.time() - self.last_reset
224
  self.response_times.append(response_time)
225
-
226
  @property
227
  def elapsed_time(self) -> float:
228
  return time.time() - self.start_time
229
-
230
  @property
231
  def last_response_time(self) -> float:
232
  return self.response_times[-1] if self.response_times else 0
@@ -317,7 +304,6 @@ class InsuranceLLM:
317
  "Assistant:"
318
  )
319
 
320
-
321
  def generate_response(self, prompt: str) -> Dict[str, Any]:
322
  if not self.llm_ctx:
323
  raise RuntimeError("Model not loaded. Call load_model() first.")
@@ -343,18 +329,10 @@ class InsuranceLLM:
343
  text_chunk = chunk["choices"][0]["text"]
344
  response["text"] += text_chunk
345
  response["tokens"] += 1
346
-
347
- # Append to complete response
348
  complete_response += text_chunk
349
-
350
- # Use simple print for streaming output
351
  print(text_chunk, end="", flush=True)
352
-
353
- # Print final newline
354
  print()
355
-
356
  return response
357
-
358
  except RuntimeError as e:
359
  if "llama_decode returned -3" in str(e):
360
  self.logger.error("Memory allocation failed. Try reducing context window or batch size")
@@ -385,21 +363,14 @@ class InsuranceLLM:
385
  question = parts[1].strip()
386
 
387
  prompt = self.get_prompt(question, context)
388
-
389
- # Reset timer before generation
390
  self.metrics.reset_timer()
391
-
392
- # Generate response
393
  response = self.generate_response(prompt)
394
-
395
  # Update metrics after generation
396
  self.metrics.update(response["tokens"])
397
-
398
  # Print metrics
399
  console.print(f"[dim]Average tokens/sec: {response['tokens']/(self.metrics.last_response_time if self.metrics.last_response_time!=0 else 1):.2f} ||[/dim]",
400
  f"[dim]Tokens generated: {response['tokens']} ||[/dim]",
401
  f"[dim]Response time: {self.metrics.last_response_time:.2f}s[/dim]", end="\n\n\n")
402
-
403
  except KeyboardInterrupt:
404
  console.print("\n[yellow]Input interrupted. Type '/bye', 'exit', or 'quit' to quit.[/yellow]")
405
  continue
@@ -407,7 +378,6 @@ class InsuranceLLM:
407
  self.logger.error(f"Error processing input: {str(e)}")
408
  console.print(f"\n[red]Error: {str(e)}[/red]")
409
  continue
410
-
411
  except Exception as e:
412
  self.logger.error(f"Fatal error in inference loop: {str(e)}")
413
  console.print(f"\n[red]Fatal error: {str(e)}[/red]")
 
153
  # Add memory usage information
154
  if not hasattr(record, 'memory'):
155
  record.memory = psutil.Process().memory_info().rss / (1024 * 1024)
 
156
  log_fmt = self.FORMATS.get(record.levelno)
157
  formatter = logging.Formatter(log_fmt, datefmt='%Y-%m-%d %H:%M:%S')
 
158
  # Add performance metrics if available
159
  if hasattr(record, 'duration'):
160
  record.message = f"{record.message}\nDuration: {record.duration:.2f}s"
 
167
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
168
  log_path = (Path(log_dir) / f"l_{timestamp}")
169
  log_path.mkdir(exist_ok=True)
 
 
170
  logger = logging.getLogger("InsuranceLLM")
171
  # Clear any existing handlers
172
  logger.handlers.clear()
173
  logger.setLevel(logging.DEBUG)
 
174
  # Create handlers with level-specific files
175
  handlers = {
176
  'debug': (logging.FileHandler(log_path / f"debug_{timestamp}.log"), logging.DEBUG),
 
184
  enable_link_path=True
185
  ), logging.INFO)
186
  }
 
 
187
  formatter = CustomFormatter()
188
  for (handler, level) in handlers.values():
189
  handler.setLevel(level)
190
  handler.setFormatter(formatter)
191
  logger.addHandler(handler)
 
 
192
  logger.info(f"Starting new session {timestamp}")
193
  logger.info(f"Log directory: {log_dir}")
194
  return logger
 
204
  self.tokens = 0
205
  self.response_times = []
206
  self.last_reset = self.start_time
 
207
  def reset_timer(self):
208
  """Reset the timer for individual response measurements"""
209
  self.last_reset = time.time()
 
210
  def update(self, tokens: int):
211
  self.tokens += tokens
212
  response_time = time.time() - self.last_reset
213
  self.response_times.append(response_time)
 
214
  @property
215
  def elapsed_time(self) -> float:
216
  return time.time() - self.start_time
 
217
  @property
218
  def last_response_time(self) -> float:
219
  return self.response_times[-1] if self.response_times else 0
 
304
  "Assistant:"
305
  )
306
 
 
307
  def generate_response(self, prompt: str) -> Dict[str, Any]:
308
  if not self.llm_ctx:
309
  raise RuntimeError("Model not loaded. Call load_model() first.")
 
329
  text_chunk = chunk["choices"][0]["text"]
330
  response["text"] += text_chunk
331
  response["tokens"] += 1
 
 
332
  complete_response += text_chunk
 
 
333
  print(text_chunk, end="", flush=True)
 
 
334
  print()
 
335
  return response
 
336
  except RuntimeError as e:
337
  if "llama_decode returned -3" in str(e):
338
  self.logger.error("Memory allocation failed. Try reducing context window or batch size")
 
363
  question = parts[1].strip()
364
 
365
  prompt = self.get_prompt(question, context)
 
 
366
  self.metrics.reset_timer()
 
 
367
  response = self.generate_response(prompt)
 
368
  # Update metrics after generation
369
  self.metrics.update(response["tokens"])
 
370
  # Print metrics
371
  console.print(f"[dim]Average tokens/sec: {response['tokens']/(self.metrics.last_response_time if self.metrics.last_response_time!=0 else 1):.2f} ||[/dim]",
372
  f"[dim]Tokens generated: {response['tokens']} ||[/dim]",
373
  f"[dim]Response time: {self.metrics.last_response_time:.2f}s[/dim]", end="\n\n\n")
 
374
  except KeyboardInterrupt:
375
  console.print("\n[yellow]Input interrupted. Type '/bye', 'exit', or 'quit' to quit.[/yellow]")
376
  continue
 
378
  self.logger.error(f"Error processing input: {str(e)}")
379
  console.print(f"\n[red]Error: {str(e)}[/red]")
380
  continue
 
381
  except Exception as e:
382
  self.logger.error(f"Fatal error in inference loop: {str(e)}")
383
  console.print(f"\n[red]Fatal error: {str(e)}[/red]")