import pandas as pd import gradio as gr from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.schema import Document # Initialize once when the app starts def initialize_system(): # Load dataset data = pd.read_csv("qa_dataset.csv") # Create documents documents = [ Document( page_content=f"Q: {row['Question']}\nA: {row['Answer']}", metadata={"question": row['Question'], "answer": row['Answer']} ) for _, row in data.iterrows() ] # Create vector store embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1" ) return FAISS.from_documents(documents, embeddings) vector_store = initialize_system() def classify_question(query: str, k: int = 3): # Retrieve similar Q&A pairs results = vector_store.similarity_search(query, k=k) # Generate category from answers answers = " ".join([doc.metadata['answer'] for doc in results]) keywords = list(dict.fromkeys(answers.split()))[:5] category = " ".join(keywords) # Format output return { "Category": category, "Top Matches": "\n\n".join([f"Q: {doc.metadata['question']}\nA: {doc.metadata['answer']}" for doc in results]), "Confidence": f"{len(results)/k:.0%}" } # Gradio interface interface = gr.Interface( fn=lambda q: classify_question(q, 3), inputs=gr.Textbox(label="Input Question", placeholder="Type your question here..."), outputs=[ gr.Textbox(label="Predicted Category"), gr.Textbox(label="Supporting Q&A"), gr.Textbox(label="Confidence") ], title="Question Classification System", description="Classify questions based on existing Q&A pairs using RAG" ) if __name__ == "__main__": interface.launch()