tejash300 commited on
Commit
c6aa39f
·
verified ·
1 Parent(s): 14e121b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  os.environ["TRANSFORMERS_NO_FAST"] = "1" # Force use of slow tokenizers
 
3
 
4
  import io
5
  import torch
@@ -13,7 +14,7 @@ import numpy as np
13
  import json
14
  import tempfile
15
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form, BackgroundTasks
16
- from fastapi.responses import FileResponse, JSONResponse, HTMLResponse # Added HTMLResponse
17
  from fastapi.middleware.cors import CORSMiddleware
18
  from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
19
  from sentence_transformers import SentenceTransformer
@@ -34,12 +35,12 @@ from gensim import corpora, models
34
  # Global cache for analysis results based on file hash
35
  analysis_cache = {}
36
 
37
- # Ensure compatibility with Google Colab
38
  try:
39
  from google.colab import drive
40
  drive.mount('/content/drive')
41
  except Exception:
42
- pass # Not running in Colab
43
 
44
  # Ensure required directories exist
45
  os.makedirs("static", exist_ok=True)
@@ -64,16 +65,13 @@ app.add_middleware(
64
  document_storage = {}
65
  chat_history = []
66
 
67
- # Function to store document context by task ID
68
  def store_document_context(task_id, text):
69
  document_storage[task_id] = text
70
  return True
71
 
72
- # Function to load document context by task ID
73
  def load_document_context(task_id):
74
  return document_storage.get(task_id, "")
75
 
76
- # Utility to compute MD5 hash from file content
77
  def compute_md5(content: bytes) -> str:
78
  return hashlib.md5(content).hexdigest()
79
 
@@ -196,14 +194,13 @@ try:
196
  spacy.cli.download("en_core_web_sm")
197
  nlp = spacy.load("en_core_web_sm")
198
  print("✅ Loading NLP models...")
199
- # Use Facebook's bart-large-cnn for summarization
200
  summarizer = pipeline(
201
  "summarization",
202
  model="facebook/bart-large-cnn",
203
  tokenizer="facebook/bart-large-cnn",
204
  device=0 if torch.cuda.is_available() else -1
205
  )
206
- # Optionally convert summarizer model to FP16 for faster inference on GPU
207
  if device == "cuda":
208
  summarizer.model.half()
209
 
@@ -344,7 +341,10 @@ def analyze_contract_clauses(text):
344
  # Create chunks of the text
345
  chunks = [text[i:i+max_length] for i in range(0, len(text), step) if i+step < len(text)]
346
  for chunk in chunks:
347
- inputs = cuad_tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512).to(device)
 
 
 
348
  with torch.no_grad():
349
  outputs = cuad_model(**inputs)
350
  predictions = torch.sigmoid(outputs.start_logits).cpu().numpy()[0]
 
1
  import os
2
  os.environ["TRANSFORMERS_NO_FAST"] = "1" # Force use of slow tokenizers
3
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # Enable synchronous CUDA errors for debugging
4
 
5
  import io
6
  import torch
 
14
  import json
15
  import tempfile
16
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form, BackgroundTasks
17
+ from fastapi.responses import FileResponse, JSONResponse, HTMLResponse
18
  from fastapi.middleware.cors import CORSMiddleware
19
  from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
20
  from sentence_transformers import SentenceTransformer
 
35
  # Global cache for analysis results based on file hash
36
  analysis_cache = {}
37
 
38
+ # Ensure compatibility with Google Colab (if applicable)
39
  try:
40
  from google.colab import drive
41
  drive.mount('/content/drive')
42
  except Exception:
43
+ pass
44
 
45
  # Ensure required directories exist
46
  os.makedirs("static", exist_ok=True)
 
65
  document_storage = {}
66
  chat_history = []
67
 
 
68
  def store_document_context(task_id, text):
69
  document_storage[task_id] = text
70
  return True
71
 
 
72
  def load_document_context(task_id):
73
  return document_storage.get(task_id, "")
74
 
 
75
  def compute_md5(content: bytes) -> str:
76
  return hashlib.md5(content).hexdigest()
77
 
 
194
  spacy.cli.download("en_core_web_sm")
195
  nlp = spacy.load("en_core_web_sm")
196
  print("✅ Loading NLP models...")
197
+ from transformers import PegasusTokenizer # Not used now since we're using bart-large-cnn
198
  summarizer = pipeline(
199
  "summarization",
200
  model="facebook/bart-large-cnn",
201
  tokenizer="facebook/bart-large-cnn",
202
  device=0 if torch.cuda.is_available() else -1
203
  )
 
204
  if device == "cuda":
205
  summarizer.model.half()
206
 
 
341
  # Create chunks of the text
342
  chunks = [text[i:i+max_length] for i in range(0, len(text), step) if i+step < len(text)]
343
  for chunk in chunks:
344
+ # Tokenize and move to GPU, then add safety clamp to avoid out-of-bound token indices
345
+ tokenized_inputs = cuad_tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
346
+ inputs = {k: v.to(device) for k, v in tokenized_inputs.items()}
347
+ inputs["input_ids"] = torch.clamp(inputs["input_ids"], max=cuad_model.config.vocab_size - 1)
348
  with torch.no_grad():
349
  outputs = cuad_model(**inputs)
350
  predictions = torch.sigmoid(outputs.start_logits).cpu().numpy()[0]