Update app.py
Browse files
app.py
CHANGED
@@ -25,15 +25,18 @@ model_cache_dir = os.path.join(base_dir, "txagent_models")
|
|
25 |
tool_cache_dir = os.path.join(base_dir, "tool_cache")
|
26 |
file_cache_dir = os.path.join(base_dir, "cache")
|
27 |
report_dir = os.path.join(base_dir, "reports")
|
|
|
28 |
|
29 |
os.makedirs(model_cache_dir, exist_ok=True)
|
30 |
os.makedirs(tool_cache_dir, exist_ok=True)
|
31 |
os.makedirs(file_cache_dir, exist_ok=True)
|
32 |
os.makedirs(report_dir, exist_ok=True)
|
|
|
33 |
|
34 |
os.environ.update({
|
35 |
"TRANSFORMERS_CACHE": model_cache_dir,
|
36 |
"HF_HOME": model_cache_dir,
|
|
|
37 |
"TOKENIZERS_PARALLELISM": "false",
|
38 |
"CUDA_LAUNCH_BLOCKING": "1"
|
39 |
})
|
@@ -60,7 +63,7 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
|
|
60 |
text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
|
61 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
62 |
page_text = page.extract_text() or ""
|
63 |
-
if any(re.search(rf'
|
64 |
text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
|
65 |
return "\n\n".join(text_chunks)
|
66 |
except Exception as e:
|
|
|
25 |
tool_cache_dir = os.path.join(base_dir, "tool_cache")
|
26 |
file_cache_dir = os.path.join(base_dir, "cache")
|
27 |
report_dir = os.path.join(base_dir, "reports")
|
28 |
+
vllm_cache_dir = os.path.join(base_dir, "vllm_cache")
|
29 |
|
30 |
os.makedirs(model_cache_dir, exist_ok=True)
|
31 |
os.makedirs(tool_cache_dir, exist_ok=True)
|
32 |
os.makedirs(file_cache_dir, exist_ok=True)
|
33 |
os.makedirs(report_dir, exist_ok=True)
|
34 |
+
os.makedirs(vllm_cache_dir, exist_ok=True)
|
35 |
|
36 |
os.environ.update({
|
37 |
"TRANSFORMERS_CACHE": model_cache_dir,
|
38 |
"HF_HOME": model_cache_dir,
|
39 |
+
"VLLM_CACHE_DIR": vllm_cache_dir,
|
40 |
"TOKENIZERS_PARALLELISM": "false",
|
41 |
"CUDA_LAUNCH_BLOCKING": "1"
|
42 |
})
|
|
|
63 |
text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
|
64 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
65 |
page_text = page.extract_text() or ""
|
66 |
+
if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
67 |
text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
|
68 |
return "\n\n".join(text_chunks)
|
69 |
except Exception as e:
|