import sys import os import json import shutil import re import gc import time from datetime import datetime from typing import List, Tuple, Dict, Union import pandas as pd import pdfplumber import gradio as gr import torch import matplotlib.pyplot as plt from fpdf import FPDF import unicodedata # === Configuration === persistent_dir = "/data/hf_cache" model_cache_dir = os.path.join(persistent_dir, "txagent_models") tool_cache_dir = os.path.join(persistent_dir, "tool_cache") file_cache_dir = os.path.join(persistent_dir, "cache") report_dir = os.path.join(persistent_dir, "reports") for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]: os.makedirs(d, exist_ok=True) os.environ["HF_HOME"] = model_cache_dir os.environ["TRANSFORMERS_CACHE"] = model_cache_dir current_dir = os.path.dirname(os.path.abspath(__file__)) src_path = os.path.abspath(os.path.join(current_dir, "src")) sys.path.insert(0, src_path) from txagent.txagent import TxAgent MAX_MODEL_TOKENS = 131072 MAX_NEW_TOKENS = 4096 MAX_CHUNK_TOKENS = 8192 BATCH_SIZE = 1 PROMPT_OVERHEAD = 300 SAFE_SLEEP = 0.5 def estimate_tokens(text: str) -> int: return len(text) // 4 + 1 def clean_response(text: str) -> str: text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL) text = re.sub(r"\n{3,}", "\n\n", text) return text.strip() def remove_duplicate_paragraphs(text: str) -> str: paragraphs = text.strip().split("\n\n") seen = set() unique_paragraphs = [] for p in paragraphs: clean_p = p.strip() if clean_p and clean_p not in seen: unique_paragraphs.append(clean_p) seen.add(clean_p) return "\n\n".join(unique_paragraphs) # === FastAPI for mobile API endpoint === from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse import uvicorn app = FastAPI() @app.post("/analyze") async def analyze_file_api(file: UploadFile = File(...)): agent = init_agent() temp_file_path = os.path.join(file_cache_dir, file.filename) with open(temp_file_path, "wb") as f: f.write(await file.read()) messages = [] messages, pdf_path = process_report(agent, open(temp_file_path, "rb"), messages) if pdf_path: return JSONResponse(content={"summary": messages[-2]['content'], "pdf": pdf_path}) return JSONResponse(content={"error": "Processing failed."}, status_code=400) # === Original Gradio UI launch preserved === if __name__ == "__main__": agent = init_agent() ui = create_ui(agent) import threading threading.Thread(target=lambda: ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)).start() uvicorn.run(app, host="0.0.0.0", port=8000)