Shriharsh commited on
Commit
1104992
·
verified ·
1 Parent(s): c803a6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -30
app.py CHANGED
@@ -4,7 +4,6 @@ import gradio as gr
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import PyPDF2
7
- import re
8
 
9
  # Set up logging with immediate writing
10
  logging.basicConfig(
@@ -19,35 +18,6 @@ logger = logging.getLogger()
19
  qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
20
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
21
 
22
- def preprocess_qa_format(text):
23
- """
24
- Detects Q/A pairs in different formats and returns only the answer text.
25
-
26
- Supported formats:
27
- 1. "Question X: ..." followed by "Answer: ..."
28
- 2. "Q X: ..." followed by "A: ..."
29
- 3. "Q X: ..." followed by an inferred answer line.
30
- """
31
- # Pattern for explicit "Question ..." and "Answer ..." pairs
32
- pattern1 = re.compile(r"(?i)question\s*\d*\s*:\s*(.+?)\n\s*answer[:]*\s*(.+?)(?:\n|$)")
33
- # Pattern for shorthand "Q ..." and "A: ..." pairs
34
- pattern2 = re.compile(r"(?i)Q\s*\d*\s*:\s*(.+?)\n\s*A[:]*\s*(.+?)(?:\n|$)")
35
- # Pattern for "Q ..." followed by an inferred answer (starting with a capital letter and ending with a period)
36
- pattern3 = re.compile(r"(?i)Q\s*\d*\s*:\s*(.+?)\n\s*([A-Z][^.]*\..+?)(?:\n|$)")
37
-
38
- def replacer(match):
39
- # We ignore the question text entirely and keep only the answer
40
- answer_text = match.group(2).strip()
41
- return f"{answer_text}\n"
42
-
43
- # Apply the transformations
44
- text = pattern1.sub(replacer, text)
45
- text = pattern2.sub(replacer, text)
46
- text = pattern3.sub(replacer, text)
47
- return text
48
-
49
-
50
-
51
  # Helper function to extract text from PDF
52
  def extract_text_from_pdf(file_path):
53
  text = ""
 
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
6
  import PyPDF2
 
7
 
8
  # Set up logging with immediate writing
9
  logging.basicConfig(
 
18
  qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
19
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Helper function to extract text from PDF
22
  def extract_text_from_pdf(file_path):
23
  text = ""