Ali2206 commited on
Commit
b9a61ea
·
verified ·
1 Parent(s): acf78d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -26
app.py CHANGED
@@ -17,9 +17,9 @@ import gc
17
  from diskcache import Cache
18
  import time
19
 
20
- # Configure logging
21
- logging.basicConfig(level=logging.INFO)
22
- logger = logging.getLogger(__name__)
23
 
24
  # Persistent directory
25
  persistent_dir = "/data/hf_cache"
@@ -61,7 +61,7 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
61
  with pdfplumber.open(file_path) as pdf:
62
  total_pages = len(pdf.pages)
63
  if total_pages == 0:
64
- logger.error("No pages found in PDF")
65
  return ""
66
 
67
  batch_size = 10
@@ -89,13 +89,13 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
89
  processed_pages += batch_size
90
  if progress_callback:
91
  progress_callback(min(processed_pages, total_pages), total_pages)
92
- logger.info("Processed %d/%d pages", min(processed_pages, total_pages), total_pages)
93
 
94
  extracted_text = "\n\n".join(filter(None, text_chunks))
95
- logger.info("Extracted %d pages, total length: %d chars", total_pages, len(extracted_text))
96
  return extracted_text
97
  except Exception as e:
98
- logger.error("PDF processing error: %s", e)
99
  return f"PDF processing error: {str(e)}"
100
 
101
  def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
@@ -103,8 +103,10 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
103
  file_h = file_hash(file_path)
104
  cache_key = f"{file_h}_{file_type}"
105
  if cache_key in cache:
106
- logger.info("Using cached extraction for %s", file_path)
107
  return cache[cache_key]
 
 
108
 
109
  if file_type == "pdf":
110
  text = extract_all_pages(file_path, progress_callback)
@@ -128,7 +130,7 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
128
  logger.info("Cached extraction for %s, size: %d bytes", file_path, len(result))
129
  return result
130
  except Exception as e:
131
- logger.error("Error processing %s: %s", os.path.basename(file_path), e)
132
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
133
 
134
  def log_system_usage(tag=""):
@@ -259,28 +261,42 @@ Patient Record Excerpt (Chunk {0} of {1}):
259
  yield history, None, ""
260
  logger.info("Starting analysis for message: %s", message[:100])
261
 
 
 
 
 
 
 
262
  extracted = ""
263
  file_hash_value = ""
264
- if files:
265
- logger.info("Processing %d uploaded files", len(files))
266
- def update_extraction_progress(current, total):
267
- progress(current / total, desc=f"Extracting text... Page {current}/{total}")
268
- return history, None, ""
269
-
270
- with ThreadPoolExecutor(max_workers=6) as executor:
271
- futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
272
- results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
273
- extracted = "\n".join(results)
274
- file_hash_value = file_hash(files[0].name) if files else ""
275
- logger.info("Extraction complete for %d files", len(files))
276
- history.append({"role": "assistant", "content": "✅ Text extraction complete."})
277
- yield history, None, ""
278
- else:
279
- logger.warning("No files uploaded for analysis")
280
 
281
  logger.info("Extracted text length: %d chars", len(extracted))
 
 
 
 
 
 
282
  chunk_size = 6000
283
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
 
 
284
  logger.info("Created %d chunks", len(chunks))
285
  combined_response = ""
286
  batch_size = 2
@@ -337,7 +353,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
337
  yield history, report_path if report_path and os.path.exists(report_path) else None, summary
338
 
339
  except Exception as e:
340
- logger.error("Analysis error: %s", e)
341
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
342
  yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"
343
 
 
17
  from diskcache import Cache
18
  import time
19
 
20
+ # Configure logging with a more specific logger name to avoid conflicts
21
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
22
+ logger = logging.getLogger("ClinicalOversightApp")
23
 
24
  # Persistent directory
25
  persistent_dir = "/data/hf_cache"
 
61
  with pdfplumber.open(file_path) as pdf:
62
  total_pages = len(pdf.pages)
63
  if total_pages == 0:
64
+ logger.error("No pages found in PDF: %s", file_path)
65
  return ""
66
 
67
  batch_size = 10
 
89
  processed_pages += batch_size
90
  if progress_callback:
91
  progress_callback(min(processed_pages, total_pages), total_pages)
92
+ logger.info("Processed %d/%d pages for %s", min(processed_pages, total_pages), total_pages, file_path)
93
 
94
  extracted_text = "\n\n".join(filter(None, text_chunks))
95
+ logger.info("Extracted %d pages from %s, total length: %d chars", total_pages, file_path, len(extracted_text))
96
  return extracted_text
97
  except Exception as e:
98
+ logger.error("PDF processing error for %s: %s", file_path, e, exc_info=True)
99
  return f"PDF processing error: {str(e)}"
100
 
101
  def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
 
103
  file_h = file_hash(file_path)
104
  cache_key = f"{file_h}_{file_type}"
105
  if cache_key in cache:
106
+ logger.info("Cache hit for %s (key: %s)", file_path, cache_key)
107
  return cache[cache_key]
108
+ else:
109
+ logger.info("Cache miss for %s (key: %s), performing fresh extraction", file_path, cache_key)
110
 
111
  if file_type == "pdf":
112
  text = extract_all_pages(file_path, progress_callback)
 
130
  logger.info("Cached extraction for %s, size: %d bytes", file_path, len(result))
131
  return result
132
  except Exception as e:
133
+ logger.error("Error processing %s: %s", os.path.basename(file_path), e, exc_info=True)
134
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
135
 
136
  def log_system_usage(tag=""):
 
261
  yield history, None, ""
262
  logger.info("Starting analysis for message: %s", message[:100])
263
 
264
+ if not files:
265
+ logger.error("No files uploaded for analysis")
266
+ history.append({"role": "assistant", "content": "❌ Please upload a file to analyze."})
267
+ yield history, None, "### Summary of Clinical Oversights\nNo file uploaded for analysis."
268
+ return
269
+
270
  extracted = ""
271
  file_hash_value = ""
272
+ logger.info("Processing %d uploaded files", len(files))
273
+ for f in files:
274
+ logger.info("Processing file: %s", f.name)
275
+
276
+ def update_extraction_progress(current, total):
277
+ progress(current / total, desc=f"Extracting text... Page {current}/{total}")
278
+ return history, None, ""
279
+
280
+ with ThreadPoolExecutor(max_workers=6) as executor:
281
+ futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
282
+ results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
283
+ extracted = "\n".join(results)
284
+ file_hash_value = file_hash(files[0].name) if files else ""
285
+ logger.info("Extraction complete for %d files", len(files))
286
+ history.append({"role": "assistant", "content": "✅ Text extraction complete."})
287
+ yield history, None, ""
288
 
289
  logger.info("Extracted text length: %d chars", len(extracted))
290
+ if len(extracted.strip()) == 0:
291
+ logger.error("Extracted text is empty")
292
+ history.append({"role": "assistant", "content": "❌ Extracted text is empty. Please ensure the file contains readable content."})
293
+ yield history, None, "### Summary of Clinical Oversights\nExtracted text is empty."
294
+ return
295
+
296
  chunk_size = 6000
297
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
298
+ if not chunks:
299
+ chunks = [""]
300
  logger.info("Created %d chunks", len(chunks))
301
  combined_response = ""
302
  batch_size = 2
 
353
  yield history, report_path if report_path and os.path.exists(report_path) else None, summary
354
 
355
  except Exception as e:
356
+ logger.error("Analysis error: %s", e, exc_info=True)
357
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
358
  yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"
359