File size: 2,747 Bytes
7e55ae2
 
 
 
 
a046927
f2a9805
1dd5b3f
f2a9805
a046927
f10bfab
a046927
f2a9805
53097eb
 
7aa0939
59f3278
f2a9805
a1a096d
f6e551c
 
a57b988
f6e551c
8c16b9e
a1a096d
 
8c16b9e
4bfbcac
0fb33af
f75a23b
c5da27e
 
 
 
8b1bbeb
1244d40
f2a9805
 
 
eb59d6c
f2a9805
8d40b58
f2a9805
a1a096d
 
f6e551c
1dd5b3f
 
 
 
 
8d40b58
 
 
 
 
 
 
 
 
 
 
a135a34
 
 
 
f10bfab
a135a34
ad85a12
a135a34
 
 
 
 
 
8955687
a135a34
 
 
 
b4dbed8
a135a34
abd27cc
f2a9805
a135a34
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import sys
import os
import json
import shutil
import re
import gc
import time
from datetime import datetime
from typing import List, Tuple, Dict, Union
import pandas as pd
import pdfplumber
import gradio as gr
import torch
import matplotlib.pyplot as plt
from fpdf import FPDF
import unicodedata

# === Configuration ===
persistent_dir = "/data/hf_cache"
model_cache_dir = os.path.join(persistent_dir, "txagent_models")
tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
file_cache_dir = os.path.join(persistent_dir, "cache")
report_dir = os.path.join(persistent_dir, "reports")

for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
    os.makedirs(d, exist_ok=True)

os.environ["HF_HOME"] = model_cache_dir
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir

current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.abspath(os.path.join(current_dir, "src"))
sys.path.insert(0, src_path)

from txagent.txagent import TxAgent

MAX_MODEL_TOKENS = 131072
MAX_NEW_TOKENS = 4096
MAX_CHUNK_TOKENS = 8192
BATCH_SIZE = 1
PROMPT_OVERHEAD = 300
SAFE_SLEEP = 0.5

def estimate_tokens(text: str) -> int:
    return len(text) // 4 + 1

def clean_response(text: str) -> str:
    text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()

def remove_duplicate_paragraphs(text: str) -> str:
    paragraphs = text.strip().split("\n\n")
    seen = set()
    unique_paragraphs = []
    for p in paragraphs:
        clean_p = p.strip()
        if clean_p and clean_p not in seen:
            unique_paragraphs.append(clean_p)
            seen.add(clean_p)
    return "\n\n".join(unique_paragraphs)

# === FastAPI for mobile API endpoint ===
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn

app = FastAPI()

@app.post("/analyze")
async def analyze_file_api(file: UploadFile = File(...)):
    agent = init_agent()
    temp_file_path = os.path.join(file_cache_dir, file.filename)
    with open(temp_file_path, "wb") as f:
        f.write(await file.read())
    messages = []
    messages, pdf_path = process_report(agent, open(temp_file_path, "rb"), messages)
    if pdf_path:
        return JSONResponse(content={"summary": messages[-2]['content'], "pdf": pdf_path})
    return JSONResponse(content={"error": "Processing failed."}, status_code=400)

# === Original Gradio UI launch preserved ===
if __name__ == "__main__":
    agent = init_agent()
    ui = create_ui(agent)
    import threading
    threading.Thread(target=lambda: ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)).start()
    uvicorn.run(app, host="0.0.0.0", port=8000)