Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,16 @@ import gradio as gr
|
|
9 |
import os
|
10 |
import pytesseract
|
11 |
from PIL import Image
|
12 |
-
|
13 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
14 |
index = faiss.read_index('IPC_index.faiss')
|
15 |
index2 = faiss.read_index('CrpC_index.faiss')
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
# Step 3: Retrieval with Citations using PDF filename
|
19 |
def retrieve_info_with_citation(query, top_k=5):
|
20 |
query_embedding = model.encode([query])
|
@@ -24,7 +28,10 @@ def retrieve_info_with_citation(query, top_k=5):
|
|
24 |
for i in range(min(top_k, len(I[0]))):
|
25 |
if D[0][i] < 1.0: # Relevance threshold
|
26 |
chunk_index = I[0][i]
|
27 |
-
|
|
|
|
|
|
|
28 |
results.append((match, citation))
|
29 |
else:
|
30 |
break
|
@@ -37,13 +44,16 @@ def retrieve_info_with_citation(query, top_k=5):
|
|
37 |
|
38 |
def retrieve_info_with_citation2(query, top_k=5):
|
39 |
query_embedding = model.encode([query])
|
40 |
-
D, I =
|
41 |
|
42 |
results = []
|
43 |
for i in range(min(top_k, len(I[0]))):
|
44 |
if D[0][i] < 1.0: # Relevance threshold
|
45 |
chunk_index = I[0][i]
|
46 |
-
|
|
|
|
|
|
|
47 |
results.append((match, citation))
|
48 |
else:
|
49 |
break
|
@@ -63,6 +73,7 @@ def retrieve_info2(query):
|
|
63 |
formatted_results = "\n\n".join([f"{i+1}. {match}\n{citation}" for i, (match, citation) in enumerate(results)])
|
64 |
return formatted_results
|
65 |
|
|
|
66 |
ipc_tool = Tool(
|
67 |
name="IPC Information Retrieval",
|
68 |
func=retrieve_info,
|
|
|
9 |
import os
|
10 |
import pytesseract
|
11 |
from PIL import Image
|
12 |
+
import pickle
|
13 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
14 |
index = faiss.read_index('IPC_index.faiss')
|
15 |
index2 = faiss.read_index('CrpC_index.faiss')
|
16 |
+
flattened_data = pickle.load('IPC_F')
|
17 |
+
pdf_filenames = pickle.load('IPC_N')
|
18 |
+
chunk_indices = pickle.load('IPC_C')
|
19 |
+
flattened_data2 = pickle.load('CrPC_F')
|
20 |
+
pdf_filenames2 = pickle.load('CrPC_N')
|
21 |
+
chunk_indices2 = pickle.load('CrPC_C')
|
22 |
# Step 3: Retrieval with Citations using PDF filename
|
23 |
def retrieve_info_with_citation(query, top_k=5):
|
24 |
query_embedding = model.encode([query])
|
|
|
28 |
for i in range(min(top_k, len(I[0]))):
|
29 |
if D[0][i] < 1.0: # Relevance threshold
|
30 |
chunk_index = I[0][i]
|
31 |
+
pdf_filename = pdf_filenames[chunk_index]
|
32 |
+
chunk_number = chunk_indices[chunk_index] + 1
|
33 |
+
match = flattened_data[chunk_index]
|
34 |
+
citation = f"Source: {pdf_filename}, Chunk: {chunk_number}"
|
35 |
results.append((match, citation))
|
36 |
else:
|
37 |
break
|
|
|
44 |
|
45 |
def retrieve_info_with_citation2(query, top_k=5):
|
46 |
query_embedding = model.encode([query])
|
47 |
+
D, I = index2.search(query_embedding, k=top_k)
|
48 |
|
49 |
results = []
|
50 |
for i in range(min(top_k, len(I[0]))):
|
51 |
if D[0][i] < 1.0: # Relevance threshold
|
52 |
chunk_index = I[0][i]
|
53 |
+
pdf_filename = pdf_filenames2[chunk_index]
|
54 |
+
chunk_number = chunk_indices2[chunk_index] + 1
|
55 |
+
match = flattened_data2[chunk_index]
|
56 |
+
citation = f"Source: {pdf_filename}, Chunk: {chunk_number}"
|
57 |
results.append((match, citation))
|
58 |
else:
|
59 |
break
|
|
|
73 |
formatted_results = "\n\n".join([f"{i+1}. {match}\n{citation}" for i, (match, citation) in enumerate(results)])
|
74 |
return formatted_results
|
75 |
|
76 |
+
|
77 |
ipc_tool = Tool(
|
78 |
name="IPC Information Retrieval",
|
79 |
func=retrieve_info,
|