File size: 2,747 Bytes
7e55ae2 a046927 f2a9805 1dd5b3f f2a9805 a046927 f10bfab a046927 f2a9805 53097eb 7aa0939 59f3278 f2a9805 a1a096d f6e551c a57b988 f6e551c 8c16b9e a1a096d 8c16b9e 4bfbcac 0fb33af f75a23b c5da27e 8b1bbeb 1244d40 f2a9805 eb59d6c f2a9805 8d40b58 f2a9805 a1a096d f6e551c 1dd5b3f 8d40b58 a135a34 f10bfab a135a34 ad85a12 a135a34 8955687 a135a34 b4dbed8 a135a34 abd27cc f2a9805 a135a34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import sys
import os
import json
import shutil
import re
import gc
import time
from datetime import datetime
from typing import List, Tuple, Dict, Union
import pandas as pd
import pdfplumber
import gradio as gr
import torch
import matplotlib.pyplot as plt
from fpdf import FPDF
import unicodedata
# === Configuration ===
persistent_dir = "/data/hf_cache"
model_cache_dir = os.path.join(persistent_dir, "txagent_models")
tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
file_cache_dir = os.path.join(persistent_dir, "cache")
report_dir = os.path.join(persistent_dir, "reports")
for d in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
os.makedirs(d, exist_ok=True)
os.environ["HF_HOME"] = model_cache_dir
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.abspath(os.path.join(current_dir, "src"))
sys.path.insert(0, src_path)
from txagent.txagent import TxAgent
MAX_MODEL_TOKENS = 131072
MAX_NEW_TOKENS = 4096
MAX_CHUNK_TOKENS = 8192
BATCH_SIZE = 1
PROMPT_OVERHEAD = 300
SAFE_SLEEP = 0.5
def estimate_tokens(text: str) -> int:
return len(text) // 4 + 1
def clean_response(text: str) -> str:
text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
def remove_duplicate_paragraphs(text: str) -> str:
paragraphs = text.strip().split("\n\n")
seen = set()
unique_paragraphs = []
for p in paragraphs:
clean_p = p.strip()
if clean_p and clean_p not in seen:
unique_paragraphs.append(clean_p)
seen.add(clean_p)
return "\n\n".join(unique_paragraphs)
# === FastAPI for mobile API endpoint ===
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn
app = FastAPI()
@app.post("/analyze")
async def analyze_file_api(file: UploadFile = File(...)):
agent = init_agent()
temp_file_path = os.path.join(file_cache_dir, file.filename)
with open(temp_file_path, "wb") as f:
f.write(await file.read())
messages = []
messages, pdf_path = process_report(agent, open(temp_file_path, "rb"), messages)
if pdf_path:
return JSONResponse(content={"summary": messages[-2]['content'], "pdf": pdf_path})
return JSONResponse(content={"error": "Processing failed."}, status_code=400)
# === Original Gradio UI launch preserved ===
if __name__ == "__main__":
agent = init_agent()
ui = create_ui(agent)
import threading
threading.Thread(target=lambda: ui.launch(server_name="0.0.0.0", server_port=7860, allowed_paths=["/data/hf_cache/reports"], share=False)).start()
uvicorn.run(app, host="0.0.0.0", port=8000)
|