amirulhazym
Feat: Implement V2 API-driven conversational AI system using Gemini 2.5 Flash Lite model
c4d9cb7
# --- Full, Final, and Verified Code for v2_multilingual_api/backend/main.py ---
import os
import json # <-- FIX 1: Added the missing json import
from fastapi import FastAPI
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer, CrossEncoder
from pinecone import Pinecone
import google.generativeai as genai
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not PINECONE_API_KEY or not GEMINI_API_KEY:
raise ValueError("API keys not found. Check your .env file.")
print("Initializing multilingual models...")
retriever_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
reranker_model = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1')
print("Models initialized.")
print("Initializing services...")
pc = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index("auracart-multilingual-kb")
genai.configure(api_key=GEMINI_API_KEY)
generation_model = genai.GenerativeModel('gemini-2.5-flash-lite-preview-06-17')
print("Services initialized.")
app = FastAPI()
class Source(BaseModel):
content: str
source_file: str = Field(alias="source") # Using an alias is fine, it maps the 'source' key to this field
class QueryRequest(BaseModel):
question: str
history: list = []
class QueryResponse(BaseModel):
answer: str
sources: list[Source]
class SuggestionRequest(BaseModel):
history: list
class SuggestionResponse(BaseModel):
suggestions: list[str]
@app.post("/api/ask", response_model=QueryResponse)
def ask_question(request: QueryRequest):
# --- FIX 2: Corrected the retrieval and source handling logic ---
# 1. Retrieve
query_embedding = retriever_model.encode(request.question).tolist()
retrieved_docs = index.query(vector=query_embedding, top_k=20, include_metadata=True)
# Keep the full document objects, not just the text
original_docs = retrieved_docs['matches']
# 2. Re-rank
# Prepare pairs of (question, passage_text) for the reranker model
reranker_input_pairs = [(request.question, doc['metadata'].get('text', '')) for doc in original_docs]
rerank_scores = reranker_model.predict(reranker_input_pairs)
# Combine the original documents with their new scores and sort
ranked_docs = sorted(zip(rerank_scores, original_docs), reverse=True)
# 3. Prepare Final Context and Sources
# Select the top 4 most relevant documents after re-ranking
top_docs = [doc for score, doc in ranked_docs[:4]]
# Prepare the context string for the LLM prompt
context_str = "\n\n".join([doc['metadata'].get('text', '') for doc in top_docs])
# Prepare the source list for the final API response
sources_for_response = [
{
"content": doc['metadata'].get('text', ''),
"source": doc['metadata'].get('source', 'unknown')
}
for doc in top_docs
]
# --- END OF FIX 2 ---
history_str = "\n".join([f"User: {h.get('user', '')}\nBot: {h.get('bot', '')}" for h in request.history])
prompt = f"""
You are "Aura," AuraCart's professional and bilingual customer service AI, fluent in both English and Malay.
Your primary goal is to provide helpful and accurate answers based ONLY on the context provided from the knowledge base.
IMPORTANT: You MUST answer in the same language as the user's question. If the user asks in Malay, you must answer in Malay.
If the context does not contain the answer, clearly state that you don't have enough information in the user's language. Do not invent answers.
CONVERSATION HISTORY:
{history_str}
KNOWLEDGE BASE CONTEXT:
---
{context_str}
---
USER'S QUESTION:
{request.question}
YOUR HELPFUL, BILINGUAL ANSWER:
"""
response = generation_model.generate_content(prompt)
# Return the generated answer AND the list of sources used
return QueryResponse(answer=response.text, sources=sources_for_response)
@app.get("/")
def read_root():
return {"message": "AuraCart Multilingual AI Backend is ready."}
@app.post("/api/suggest_questions", response_model=SuggestionResponse)
def suggest_questions(request: SuggestionRequest):
history_str = "\n".join([f"User: {h.get('user', '')}\nBot: {h.get('bot', '')}" for h in request.history])
prompt = f"""
Based on the following conversation history between a user and an e-commerce chatbot, generate exactly three, concise, and highly relevant follow-up questions that the user might ask next.
The questions should be natural and continue the flow of the conversation.
Return ONLY a JSON list of strings. Do not add any other text.
Example Format:
["What are the shipping costs?", "Can I track my order?", "What is the return policy for electronics?"]
Conversation History:
---
{history_str}
---
JSON list of three suggested questions:
"""
response = generation_model.generate_content(prompt)
try:
clean_response = response.text.strip().replace("```json", "").replace("```", "")
suggestions = json.loads(clean_response)
except (json.JSONDecodeError, AttributeError):
suggestions = ["How do I return an item?", "What are the payment options?", "How can I track my order?"]
return SuggestionResponse(suggestions=suggestions)