File size: 4,965 Bytes
9408cb5
 
e3e0c81
9408cb5
16a513a
9408cb5
 
 
 
0d02cc4
 
 
 
2f0f369
16a513a
9408cb5
e3e0c81
9408cb5
 
2f0f369
9408cb5
 
 
 
 
e3e0c81
 
9408cb5
e3e0c81
 
9408cb5
e3e0c81
 
9408cb5
 
 
 
 
 
2f0f369
16a513a
 
 
9408cb5
16a513a
 
9408cb5
16a513a
 
 
 
9408cb5
16a513a
9408cb5
 
 
 
e3e0c81
16a513a
e3e0c81
9408cb5
e3e0c81
9408cb5
 
 
 
 
e3e0c81
 
 
9408cb5
 
 
 
 
 
16a513a
9408cb5
 
2f0f369
 
 
16a513a
 
2f0f369
e3e0c81
0d02cc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3e0c81
2f0f369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d02cc4
2f0f369
 
0d02cc4
 
 
 
2f0f369
0d02cc4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# app.py
import os
import warnings
from dotenv import load_dotenv
import gradio as gr
from qdrant_search import QdrantSearch
from langchain_groq import ChatGroq
from nomic_embeddings import EmbeddingsModel

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel

# Load environment variables
load_dotenv()

# Suppress FutureWarnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Disable tokenizers parallelism
os.environ["TOKENIZERS_PARALLELISM"] = "FALSE"

# Initialize global variables
collection_names = ["docs_v1_2", "docs_v2_2", "docs_v3_2"]
limit = 5

# Initialize the language model
llm = ChatGroq(model="mixtral-8x7b-32768")

# Initialize the embeddings model
embeddings = EmbeddingsModel()

# Initialize Qdrant search with necessary credentials
search = QdrantSearch(
    qdrant_url=os.environ["QDRANT_CLOUD_URL"],
    api_key=os.environ["QDRANT_API_KEY"],
    embeddings=embeddings
)

def chat_function(question: str):
    """
    Handles the chat functionality by processing the user's question,
    retrieving relevant documents, generating an answer, and returning sources.

    Args:
        question (str): The user's question.

    Returns:
        Tuple[str, str]: The generated answer and the sources used.
    """
    query = question.strip()
    if not query:
        return "❌ **Error:** Query cannot be empty.", "No sources available."

    # Step 1: Retrieve relevant documents from Qdrant
    retrieved_docs = search.query_multiple_collections(query, collection_names, limit)

    if not retrieved_docs:
        return "⚠️ **No relevant documents found** for your query.", "No sources available."

    # Step 2: Prepare the context from retrieved documents
    context = "\n\n".join([doc['text'] for doc in retrieved_docs])

    # Step 3: Construct the prompt with context and question
    prompt = (
        "You are LangAssist, a knowledgeable assistant for the LangChain Python Library. "
        "Given the following context from the documentation, provide a helpful answer to the user's question.\n\n"
        "### Context:\n{context}\n\n"
        "### Question:\n{question}\n\n"
        "### Answer:"
    ).format(context=context, question=query)

    # Step 4: Generate an answer using the language model
    try:
        answer = llm.invoke(prompt)
    except Exception as e:
        return f"⚠️ **Error generating answer:** {str(e)}", "No sources available."

    # Prepare sources
    sources = "\n\n".join([
        f"**Source:** {doc['source']}\n**Excerpt:** {doc['text']}"
        for doc in retrieved_docs
    ])

    return answer.content.strip(), sources

# Define Pydantic model for request
class ChatRequest(BaseModel):
    question: str

# Initialize FastAPI app
app = FastAPI()

# Define allowed origins
origins = [
    "*",  # Allow all origins; for production, specify your frontend domains
    # Example:
    # "http://localhost",
    # "http://localhost:3000",
    # "https://your-frontend-domain.com",
]

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,            # Allows all origins. Replace "*" with specific domains in production.
    allow_credentials=True,
    allow_methods=["*"],              # Allows all HTTP methods.
    allow_headers=["*"],              # Allows all headers.
)

# Define API endpoint
@app.post("/api/chat")
async def api_chat(request: ChatRequest):
    try:
        answer, sources = chat_function(request.question)
        return {"answer": answer, "sources": sources}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Create Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🗨️ LangAssist Chat")
    gr.Markdown("Ask questions about the LangChain Python Library and get answers based on the latest documentation.")

    with gr.Row():
        with gr.Column(scale=2):
            question_input = gr.Textbox(
                lines=2,
                placeholder="Type your question here...",
                label="Your Question"
            )
            submit_button = gr.Button("💬 Submit")
        with gr.Column(scale=3):
            answer_output = gr.Markdown("### Answer will appear here...")
            sources_output = gr.Markdown("### Sources will appear here...")

    submit_button.click(
        fn=chat_function,
        inputs=question_input,
        outputs=[answer_output, sources_output]
    )

    gr.Markdown("""
    ---
    ## 📡 API Endpoint

    You can access the API endpoint at `/api/chat`. For example, send a POST request to `http://localhost:8000/api/chat` with JSON body `{"question": "Your question here"}`.
    """)

# Mount Gradio app on FastAPI
app = gr.mount_gradio_app(app, demo, path="/gradio")

# To run, use: uvicorn app:app --host 0.0.0.0 --port 8000
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)