Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
os.environ["TRANSFORMERS_NO_FAST"] = "1" # Force use of slow tokenizers
|
|
|
3 |
|
4 |
import io
|
5 |
import torch
|
@@ -13,7 +14,7 @@ import numpy as np
|
|
13 |
import json
|
14 |
import tempfile
|
15 |
from fastapi import FastAPI, UploadFile, File, HTTPException, Form, BackgroundTasks
|
16 |
-
from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
|
17 |
from fastapi.middleware.cors import CORSMiddleware
|
18 |
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
|
19 |
from sentence_transformers import SentenceTransformer
|
@@ -34,12 +35,12 @@ from gensim import corpora, models
|
|
34 |
# Global cache for analysis results based on file hash
|
35 |
analysis_cache = {}
|
36 |
|
37 |
-
# Ensure compatibility with Google Colab
|
38 |
try:
|
39 |
from google.colab import drive
|
40 |
drive.mount('/content/drive')
|
41 |
except Exception:
|
42 |
-
pass
|
43 |
|
44 |
# Ensure required directories exist
|
45 |
os.makedirs("static", exist_ok=True)
|
@@ -64,16 +65,13 @@ app.add_middleware(
|
|
64 |
document_storage = {}
|
65 |
chat_history = []
|
66 |
|
67 |
-
# Function to store document context by task ID
|
68 |
def store_document_context(task_id, text):
|
69 |
document_storage[task_id] = text
|
70 |
return True
|
71 |
|
72 |
-
# Function to load document context by task ID
|
73 |
def load_document_context(task_id):
|
74 |
return document_storage.get(task_id, "")
|
75 |
|
76 |
-
# Utility to compute MD5 hash from file content
|
77 |
def compute_md5(content: bytes) -> str:
|
78 |
return hashlib.md5(content).hexdigest()
|
79 |
|
@@ -196,14 +194,13 @@ try:
|
|
196 |
spacy.cli.download("en_core_web_sm")
|
197 |
nlp = spacy.load("en_core_web_sm")
|
198 |
print("✅ Loading NLP models...")
|
199 |
-
#
|
200 |
summarizer = pipeline(
|
201 |
"summarization",
|
202 |
model="facebook/bart-large-cnn",
|
203 |
tokenizer="facebook/bart-large-cnn",
|
204 |
device=0 if torch.cuda.is_available() else -1
|
205 |
)
|
206 |
-
# Optionally convert summarizer model to FP16 for faster inference on GPU
|
207 |
if device == "cuda":
|
208 |
summarizer.model.half()
|
209 |
|
@@ -344,7 +341,10 @@ def analyze_contract_clauses(text):
|
|
344 |
# Create chunks of the text
|
345 |
chunks = [text[i:i+max_length] for i in range(0, len(text), step) if i+step < len(text)]
|
346 |
for chunk in chunks:
|
347 |
-
|
|
|
|
|
|
|
348 |
with torch.no_grad():
|
349 |
outputs = cuad_model(**inputs)
|
350 |
predictions = torch.sigmoid(outputs.start_logits).cpu().numpy()[0]
|
|
|
1 |
import os
|
2 |
os.environ["TRANSFORMERS_NO_FAST"] = "1" # Force use of slow tokenizers
|
3 |
+
os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # Enable synchronous CUDA errors for debugging
|
4 |
|
5 |
import io
|
6 |
import torch
|
|
|
14 |
import json
|
15 |
import tempfile
|
16 |
from fastapi import FastAPI, UploadFile, File, HTTPException, Form, BackgroundTasks
|
17 |
+
from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
|
18 |
from fastapi.middleware.cors import CORSMiddleware
|
19 |
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
|
20 |
from sentence_transformers import SentenceTransformer
|
|
|
35 |
# Global cache for analysis results based on file hash
|
36 |
analysis_cache = {}
|
37 |
|
38 |
+
# Ensure compatibility with Google Colab (if applicable)
|
39 |
try:
|
40 |
from google.colab import drive
|
41 |
drive.mount('/content/drive')
|
42 |
except Exception:
|
43 |
+
pass
|
44 |
|
45 |
# Ensure required directories exist
|
46 |
os.makedirs("static", exist_ok=True)
|
|
|
65 |
document_storage = {}
|
66 |
chat_history = []
|
67 |
|
|
|
68 |
def store_document_context(task_id, text):
|
69 |
document_storage[task_id] = text
|
70 |
return True
|
71 |
|
|
|
72 |
def load_document_context(task_id):
|
73 |
return document_storage.get(task_id, "")
|
74 |
|
|
|
75 |
def compute_md5(content: bytes) -> str:
|
76 |
return hashlib.md5(content).hexdigest()
|
77 |
|
|
|
194 |
spacy.cli.download("en_core_web_sm")
|
195 |
nlp = spacy.load("en_core_web_sm")
|
196 |
print("✅ Loading NLP models...")
|
197 |
+
from transformers import PegasusTokenizer # Not used now since we're using bart-large-cnn
|
198 |
summarizer = pipeline(
|
199 |
"summarization",
|
200 |
model="facebook/bart-large-cnn",
|
201 |
tokenizer="facebook/bart-large-cnn",
|
202 |
device=0 if torch.cuda.is_available() else -1
|
203 |
)
|
|
|
204 |
if device == "cuda":
|
205 |
summarizer.model.half()
|
206 |
|
|
|
341 |
# Create chunks of the text
|
342 |
chunks = [text[i:i+max_length] for i in range(0, len(text), step) if i+step < len(text)]
|
343 |
for chunk in chunks:
|
344 |
+
# Tokenize and move to GPU, then add safety clamp to avoid out-of-bound token indices
|
345 |
+
tokenized_inputs = cuad_tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
|
346 |
+
inputs = {k: v.to(device) for k, v in tokenized_inputs.items()}
|
347 |
+
inputs["input_ids"] = torch.clamp(inputs["input_ids"], max=cuad_model.config.vocab_size - 1)
|
348 |
with torch.no_grad():
|
349 |
outputs = cuad_model(**inputs)
|
350 |
predictions = torch.sigmoid(outputs.start_logits).cpu().numpy()[0]
|