RoAr777 commited on
Commit
f96bbd9
·
verified ·
1 Parent(s): 87805d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -9,12 +9,16 @@ import gradio as gr
9
  import os
10
  import pytesseract
11
  from PIL import Image
12
-
13
  model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
14
  index = faiss.read_index('IPC_index.faiss')
15
  index2 = faiss.read_index('CrpC_index.faiss')
16
-
17
-
 
 
 
 
18
  # Step 3: Retrieval with Citations using PDF filename
19
  def retrieve_info_with_citation(query, top_k=5):
20
  query_embedding = model.encode([query])
@@ -24,7 +28,10 @@ def retrieve_info_with_citation(query, top_k=5):
24
  for i in range(min(top_k, len(I[0]))):
25
  if D[0][i] < 1.0: # Relevance threshold
26
  chunk_index = I[0][i]
27
- citation = f"Source: IPC"
 
 
 
28
  results.append((match, citation))
29
  else:
30
  break
@@ -37,13 +44,16 @@ def retrieve_info_with_citation(query, top_k=5):
37
 
38
  def retrieve_info_with_citation2(query, top_k=5):
39
  query_embedding = model.encode([query])
40
- D, I = index.search(query_embedding, k=top_k)
41
 
42
  results = []
43
  for i in range(min(top_k, len(I[0]))):
44
  if D[0][i] < 1.0: # Relevance threshold
45
  chunk_index = I[0][i]
46
- citation = f"Source: CrPC"
 
 
 
47
  results.append((match, citation))
48
  else:
49
  break
@@ -63,6 +73,7 @@ def retrieve_info2(query):
63
  formatted_results = "\n\n".join([f"{i+1}. {match}\n{citation}" for i, (match, citation) in enumerate(results)])
64
  return formatted_results
65
 
 
66
  ipc_tool = Tool(
67
  name="IPC Information Retrieval",
68
  func=retrieve_info,
 
9
  import os
10
  import pytesseract
11
  from PIL import Image
12
+ import pickle
13
  model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
14
  index = faiss.read_index('IPC_index.faiss')
15
  index2 = faiss.read_index('CrpC_index.faiss')
16
+ flattened_data = pickle.load('IPC_F')
17
+ pdf_filenames = pickle.load('IPC_N')
18
+ chunk_indices = pickle.load('IPC_C')
19
+ flattened_data2 = pickle.load('CrPC_F')
20
+ pdf_filenames2 = pickle.load('CrPC_N')
21
+ chunk_indices2 = pickle.load('CrPC_C')
22
  # Step 3: Retrieval with Citations using PDF filename
23
  def retrieve_info_with_citation(query, top_k=5):
24
  query_embedding = model.encode([query])
 
28
  for i in range(min(top_k, len(I[0]))):
29
  if D[0][i] < 1.0: # Relevance threshold
30
  chunk_index = I[0][i]
31
+ pdf_filename = pdf_filenames[chunk_index]
32
+ chunk_number = chunk_indices[chunk_index] + 1
33
+ match = flattened_data[chunk_index]
34
+ citation = f"Source: {pdf_filename}, Chunk: {chunk_number}"
35
  results.append((match, citation))
36
  else:
37
  break
 
44
 
45
  def retrieve_info_with_citation2(query, top_k=5):
46
  query_embedding = model.encode([query])
47
+ D, I = index2.search(query_embedding, k=top_k)
48
 
49
  results = []
50
  for i in range(min(top_k, len(I[0]))):
51
  if D[0][i] < 1.0: # Relevance threshold
52
  chunk_index = I[0][i]
53
+ pdf_filename = pdf_filenames2[chunk_index]
54
+ chunk_number = chunk_indices2[chunk_index] + 1
55
+ match = flattened_data2[chunk_index]
56
+ citation = f"Source: {pdf_filename}, Chunk: {chunk_number}"
57
  results.append((match, citation))
58
  else:
59
  break
 
73
  formatted_results = "\n\n".join([f"{i+1}. {match}\n{citation}" for i, (match, citation) in enumerate(results)])
74
  return formatted_results
75
 
76
+
77
  ipc_tool = Tool(
78
  name="IPC Information Retrieval",
79
  func=retrieve_info,