Jyothish9988 commited on
Commit
1282c03
·
verified ·
1 Parent(s): eb280b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -20
app.py CHANGED
@@ -3,12 +3,23 @@ import gradio as gr
3
  from langchain.vectorstores import FAISS
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.schema import Document
 
 
6
 
7
- # Initialize once when the app starts
 
 
 
 
8
  def initialize_system():
9
- # Load dataset
10
- data = pd.read_csv("qa_dataset.csv")
11
-
 
 
 
 
 
12
  # Create documents
13
  documents = [
14
  Document(
@@ -16,30 +27,35 @@ def initialize_system():
16
  metadata={"question": row['Question'], "answer": row['Answer']}
17
  ) for _, row in data.iterrows()
18
  ]
19
-
20
- # Create vector store
21
- embeddings = HuggingFaceEmbeddings(
22
- model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1"
23
- )
24
-
25
- return FAISS.from_documents(documents, embeddings)
 
 
 
 
 
26
 
27
  vector_store = initialize_system()
28
 
29
  def classify_question(query: str, k: int = 3):
30
- # Retrieve similar Q&A pairs
31
  results = vector_store.similarity_search(query, k=k)
32
-
33
- # Generate category from answers
 
 
34
  answers = " ".join([doc.metadata['answer'] for doc in results])
35
- keywords = list(dict.fromkeys(answers.split()))[:5]
36
  category = " ".join(keywords)
37
-
38
- # Format output
39
  return {
40
  "Category": category,
41
  "Top Matches": "\n\n".join([f"Q: {doc.metadata['question']}\nA: {doc.metadata['answer']}"
42
- for doc in results]),
43
  "Confidence": f"{len(results)/k:.0%}"
44
  }
45
 
@@ -53,8 +69,8 @@ interface = gr.Interface(
53
  gr.Textbox(label="Confidence")
54
  ],
55
  title="Question Classification System",
56
- description="Classify questions based on existing Q&A pairs using RAG"
57
  )
58
 
59
  if __name__ == "__main__":
60
- interface.launch()
 
3
  from langchain.vectorstores import FAISS
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.schema import Document
6
+ import os
7
+ import pickle
8
 
9
+ EMBEDDING_MODEL = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
10
+ DATASET_PATH = "qa_dataset.csv"
11
+ FAISS_INDEX_PATH = "faiss_index"
12
+
13
+ # Initialize system (Load dataset and FAISS index)
14
  def initialize_system():
15
+ if os.path.exists(FAISS_INDEX_PATH):
16
+ print("Loading FAISS index from cache...")
17
+ with open(FAISS_INDEX_PATH, "rb") as f:
18
+ return pickle.load(f)
19
+
20
+ print("Initializing FAISS from scratch...")
21
+ data = pd.read_csv(DATASET_PATH).dropna().head(500) # Limit rows for speed
22
+
23
  # Create documents
24
  documents = [
25
  Document(
 
27
  metadata={"question": row['Question'], "answer": row['Answer']}
28
  ) for _, row in data.iterrows()
29
  ]
30
+
31
+ # Load embedding model once
32
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
33
+
34
+ # Create FAISS vector store
35
+ vector_store = FAISS.from_documents(documents, embeddings)
36
+
37
+ # Cache the FAISS index
38
+ with open(FAISS_INDEX_PATH, "wb") as f:
39
+ pickle.dump(vector_store, f)
40
+
41
+ return vector_store
42
 
43
  vector_store = initialize_system()
44
 
45
  def classify_question(query: str, k: int = 3):
 
46
  results = vector_store.similarity_search(query, k=k)
47
+
48
+ if not results:
49
+ return {"Category": "Unknown", "Top Matches": "No matches found", "Confidence": "0%"}
50
+
51
  answers = " ".join([doc.metadata['answer'] for doc in results])
52
+ keywords = list(dict.fromkeys(answers.split()))[:5] # Extract first 5 unique words
53
  category = " ".join(keywords)
54
+
 
55
  return {
56
  "Category": category,
57
  "Top Matches": "\n\n".join([f"Q: {doc.metadata['question']}\nA: {doc.metadata['answer']}"
58
+ for doc in results]),
59
  "Confidence": f"{len(results)/k:.0%}"
60
  }
61
 
 
69
  gr.Textbox(label="Confidence")
70
  ],
71
  title="Question Classification System",
72
+ description="Classify questions based on existing Q&A pairs using FAISS"
73
  )
74
 
75
  if __name__ == "__main__":
76
+ interface.launch(share=True)