|
import pandas as pd |
|
import gradio as gr |
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.schema import Document |
|
|
|
|
|
def initialize_system(): |
|
|
|
data = pd.read_csv("qa_dataset.csv") |
|
|
|
|
|
documents = [ |
|
Document( |
|
page_content=f"Q: {row['Question']}\nA: {row['Answer']}", |
|
metadata={"question": row['Question'], "answer": row['Answer']} |
|
) for _, row in data.iterrows() |
|
] |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings( |
|
model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1" |
|
) |
|
|
|
return FAISS.from_documents(documents, embeddings) |
|
|
|
vector_store = initialize_system() |
|
|
|
def classify_question(query: str, k: int = 3): |
|
|
|
results = vector_store.similarity_search(query, k=k) |
|
|
|
|
|
answers = " ".join([doc.metadata['answer'] for doc in results]) |
|
keywords = list(dict.fromkeys(answers.split()))[:5] |
|
category = " ".join(keywords) |
|
|
|
|
|
return { |
|
"Category": category, |
|
"Top Matches": "\n\n".join([f"Q: {doc.metadata['question']}\nA: {doc.metadata['answer']}" |
|
for doc in results]), |
|
"Confidence": f"{len(results)/k:.0%}" |
|
} |
|
|
|
|
|
interface = gr.Interface( |
|
fn=lambda q: classify_question(q, 3), |
|
inputs=gr.Textbox(label="Input Question", placeholder="Type your question here..."), |
|
outputs=[ |
|
gr.Textbox(label="Predicted Category"), |
|
gr.Textbox(label="Supporting Q&A"), |
|
gr.Textbox(label="Confidence") |
|
], |
|
title="Question Classification System", |
|
description="Classify questions based on existing Q&A pairs using RAG" |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |