Spaces:

Sarath0x8f
/

Epic-Minds

Runtime error

App Files Files Community

Sarath0x8f commited on May 20

Commit

f32ea1e

verified ·

1 Parent(s): baab591

Upload 2 files

Browse files

Files changed (2) hide show

app.py +159 -139
markdown.py +153 -42

app.py CHANGED Viewed

@@ -1,140 +1,160 @@
-import gradio as gr
-import pymongo
-import certifi
-from llama_index.core import VectorStoreIndex
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.llms.groq import Groq
-from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
-from llama_index.core.prompts import PromptTemplate
-from dotenv import load_dotenv
-import os
-import base64
-import markdown as md
-from datetime import datetime
-# Load environment variables
-load_dotenv()
-# --- MongoDB Config ---
-ATLAS_CONNECTION_STRING = os.getenv("ATLAS_CONNECTION_STRING")
-DB_NAME = "RAG"
-COLLECTION_NAME = "ramayana"
-VECTOR_INDEX_NAME = "ramayana_vector_index"
-# --- Embedding Model ---
-embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
-# --- Prompt Template ---
-ramayana_qa_template = PromptTemplate(
-    """You are an expert on the Valmiki Ramayana and a guide who always inspires people with the great Itihasa like the Ramayana.
-    Below is text from the epic, including shlokas and their explanations:
-    ---------------------
-    {context_str}
-    ---------------------
-    Using only this information, answer the following query.
-    Query: {query_str}
-    Answer:
-     - Intro or general description to ```Query```
-     - Related shloka/shlokas followed by its explanation
-     - Overview of ```Query```
-    """
-)
-# --- Connect to MongoDB once at startup ---
-def get_vector_index_once():
-    mongo_client = pymongo.MongoClient(
-        ATLAS_CONNECTION_STRING,
-        tlsCAFile=certifi.where(),
-        tlsAllowInvalidCertificates=False,
-        connectTimeoutMS=30000,
-        serverSelectionTimeoutMS=30000,
-    )
-    mongo_client.server_info()
-    print("✅ Connected to MongoDB Atlas.")
-    vector_store = MongoDBAtlasVectorSearch(
-        mongo_client,
-        db_name=DB_NAME,
-        collection_name=COLLECTION_NAME,
-        vector_index_name=VECTOR_INDEX_NAME,
-    )
-    return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
-# Connect once
-vector_index = get_vector_index_once()
-# --- Respond Function (uses API key from state) ---
-def chat_with_groq(message, history, groq_key):
-    llm = Groq(model="llama-3.1-8b-instant", api_key=groq_key)
-    query_engine = vector_index.as_query_engine(
-        llm=llm,
-        text_qa_template=ramayana_qa_template,
-        similarity_top_k=5,
-        verbose=True,
-    )
-    response = query_engine.query(message)
-    print(f"\n{datetime.now()}:: {message} --> {str(response)}\n")
-    return str(response)
-def encode_image(image_path):
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
-# Encode the images
-github_logo_encoded = encode_image("Images/github-logo.png")
-linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
-website_logo_encoded = encode_image("Images/ai-logo.png")
-# --- Gradio UI ---
-with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
-    with gr.Tabs():
-        with gr.TabItem("Intro"):
-            gr.Markdown(md.description)
-        with gr.TabItem("GPT"):
-            with gr.Column(visible=True) as accordion_container:
-                with gr.Accordion("How to get Groq API KEY", open=False):
-                    gr.Markdown(md.groq_api_key)
-            groq_key_box = gr.Textbox(
-                    label="Enter Groq API Key",
-                    type="password",
-                    placeholder="Paste your Groq API key here..."
-                )
-            start_btn = gr.Button("Start Chat")
-            groq_state = gr.State(value="")
-            # Chat container, initially hidden
-            with gr.Column(visible=False) as chatbot_container:
-                chatbot = gr.ChatInterface(
-                    fn=lambda message, history, groq_key: chat_with_groq(message, history, groq_key),
-                    additional_inputs=[groq_state],
-                    chatbot=gr.Chatbot(height=500),
-                    title="🕉️ RamayanaGPT",
-                    show_progress="full",
-                    fill_height=True,
-                    # description="Ask questions from the Valmiki Ramayana. Powered by RAG + MongoDB + LlamaIndex.",
-                )
-            # Show chat and hide inputs
-            def save_key_and_show_chat(key):
-                print(f"key: {key}")
-                return key, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
-            start_btn.click(
-                fn=save_key_and_show_chat,
-                inputs=[groq_key_box],
-                outputs=[groq_state, groq_key_box, start_btn, accordion_container, chatbot_container]
-            )
-        gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
-if __name__ == "__main__":
     demo.launch()

+import gradio as gr
+import pymongo
+import certifi
+from llama_index.core import VectorStoreIndex
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.groq import Groq
+from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
+from llama_index.core.prompts import PromptTemplate
+from dotenv import load_dotenv
+import os
+import base64
+import markdown as md
+from datetime import datetime
+# Load environment variables
+load_dotenv()
+# --- Embedding Model ---
+embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
+# --- Prompt Template ---
+ramayana_qa_template = PromptTemplate(
+    """You are an expert on the Valmiki Ramayana and a guide who always inspires people with the great Itihasa like the Ramayana.
+    Below is text from the epic, including shlokas and their explanations:
+    ---------------------
+    {context_str}
+    ---------------------
+    Using only this information, answer the following query.
+    Query: {query_str}
+    Answer:
+     - Intro or general description to ```Query```
+     - Related shloka/shlokas followed by its explanation
+     - Overview of ```Query```"""
+)
+gita_qa_template = PromptTemplate(
+    """You are an expert on the Bhagavad Gita and a spiritual guide.
+    Below is text from the scripture, including verses and their explanations:
+    ---------------------
+    {context_str}
+    ---------------------
+    Using only this information, answer the following query.
+    Query: {query_str}
+    Answer:
+     - Intro or context about the topic
+     - Relevant verse(s) with explanation
+     - Conclusion or reflection"""
+)
+# --- Connect to MongoDB once at startup ---
+def get_vector_index(db_name, collection_name, vector_index_name):
+    mongo_client = pymongo.MongoClient(
+        os.getenv("ATLAS_CONNECTION_STRING"),
+        tlsCAFile=certifi.where(),
+        tlsAllowInvalidCertificates=False,
+        connectTimeoutMS=30000,
+        serverSelectionTimeoutMS=30000,
+    )
+    mongo_client.server_info()
+    print(f"✅ Connected to MongoDB Atlas for collection: {collection_name}")
+    vector_store = MongoDBAtlasVectorSearch(
+        mongo_client,
+        db_name=db_name,
+        collection_name=collection_name,
+        vector_index_name=vector_index_name,
+    )
+    return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
+# --- Respond Function (uses API key from state) ---
+def chat_with_groq(index, template):
+    def fn(message, history, groq_key):
+        if not groq_key or not groq_key.startswith("gsk_"):
+            return "❌ Invalid Groq API Key. Please enter a valid key."
+        llm = Groq(model="llama-3.1-8b-instant", api_key=groq_key)
+        query_engine = index.as_query_engine(
+            llm=llm,
+            text_qa_template=template,
+            similarity_top_k=5,
+            verbose=True,
+        )
+        response = query_engine.query(message)
+        print(f"\n{datetime.now()}:: {message} --> {str(response)}\n")
+        return str(response)
+    return fn
+# Load vector indices once
+ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
+gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
+# Encode logos
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+github_logo_encoded = encode_image("Images/github-logo.png")
+linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
+website_logo_encoded = encode_image("Images/ai-logo.png")
+# --- Gradio UI ---
+with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
+    with gr.Tabs():
+        with gr.TabItem("Intro"):
+            gr.Markdown(md.description)
+        def create_tab(tab_title, chatbot_title, vector_index, template, intro):
+            with gr.TabItem(tab_title):
+                with gr.Column(visible=True) as accordion_container:
+                    with gr.Accordion("How to get Groq API KEY", open=False):
+                        gr.Markdown(md.groq_api_key)
+                groq_key_box = gr.Textbox(
+                    label="Enter Groq API Key",
+                    type="password",
+                    placeholder="Paste your Groq API key here..."
+                )
+                start_btn = gr.Button("Start Chat")
+                groq_state = gr.State(value="")
+                with gr.Column(visible=False) as chatbot_container:
+                    with gr.Accordion("Overview & Summary", open=False):
+                        gr.Markdown(intro)
+                    chatbot = gr.ChatInterface(
+                        fn=chat_with_groq(vector_index, template),
+                        additional_inputs=[groq_state],
+                        chatbot=gr.Chatbot(height=500),
+                        title=chatbot_title,
+                        show_progress="full",
+                        fill_height=True,
+                    )
+                def save_key_and_show_chat(key):
+                    if key and key.startswith("gsk_"):
+                        return key, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
+                    else:
+                        return "", gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
+                start_btn.click(
+                    fn=save_key_and_show_chat,
+                    inputs=[groq_key_box],
+                    outputs=[groq_state, groq_key_box, start_btn, accordion_container, chatbot_container]
+                )
+        create_tab("RamayanaGPT", "🕉️ RamayanaGPT", ramayana_index, ramayana_qa_template, md.RamayanaGPT)
+        create_tab("GitaGPT", "🕉️ GitaGPT", gita_index, gita_qa_template, md.GitaGPT)
+        gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
+if __name__ == "__main__":
     demo.launch()

markdown.py CHANGED Viewed

@@ -1,11 +1,19 @@
 description = """
-## 🕉️ **Project Title: RamayanaGPT – A RAG-based Chatbot for Valmiki Ramayana**
 ---
 ### 🔍 **Project Overview**
-**RamayanaGPT** is a knowledge-based conversational chatbot designed to answer questions from the *Valmiki Ramayana*. It leverages advanced **Retrieval-Augmented Generation (RAG)** principles to provide accurate and context-rich responses by referencing canonical verses (*shlokas*) and their explanations. This project integrates **MongoDB Atlas**, **LlamaIndex**, **Groq LLM**, and **Gradio UI**, offering an intuitive and scholarly digital assistant for users curious about the ancient epic.
 ---
@@ -13,72 +21,99 @@ description = """
 #### 1. **Vector Store: MongoDB Atlas**
-* The *Valmiki Ramayana* dataset is stored in a MongoDB Atlas collection.
-* Each document consists of metadata fields: `kanda`, `sarga`, `shloka`, and `shloka_text`, with an `explanation` used for semantic retrieval.
-* MongoDB Atlas Vector Search is configured using `MongoDBAtlasVectorSearch` for efficient similarity-based queries.
-#### 2. **Embeddings: Hugging Face**
-* Embedding model: `intfloat/multilingual-e5-base` from HuggingFace.
-* Converts shloka and explanation texts into vector representations for similarity search.
 #### 3. **Language Model: Groq API**
-* Model used: `llama-3.1-8b-instant`.
-* API key is provided at runtime by the user.
-* Integrates with the query engine to synthesize responses based on context-relevant documents.
 #### 4. **Prompt Engineering**
-* A custom `PromptTemplate` guides the LLM to:
-  * Provide an introduction.
-  * Quote relevant shlokas.
-  * Explain them.
-  * Give a closing summary relevant to the query.
-* Prompt ensures scholarly tone and contextual accuracy.
-#### 5. **Vector Index**
-* Built once during app startup using `VectorStoreIndex.from_vector_store()`.
-* Shared across user queries to prevent repeated MongoDB connections (for efficiency and speed).
 #### 6. **User Interface: Gradio**
-* Tabbed interface using `gr.Blocks` with a clean `Soft` theme and Google Fonts.
-* Users input their Groq API key.
-* After key submission:
-  * API key input and button are hidden.
-  * Chat interface is shown using `gr.ChatInterface`.
-* Uses `gr.State` to hold the Groq API key during the session.
 ---
 ### ⚙️ **Technical Stack**
-| Component       | Technology                          |
-| --------------- | ----------------------------------- |
-| Backend LLM     | Groq (LLaMA 3.1 8B via API)         |
-| Embedding Model | Hugging Face (multilingual-e5-base) |
-| Vector Store    | MongoDB Atlas Vector Search         |
-| Query Engine    | LlamaIndex                          |
-| Prompt Engine   | LlamaIndex PromptTemplate           |
-| UI Framework    | Gradio (Blocks + ChatInterface)     |
-| Deployment      | Python app using `app.py`           |
 ---
 ### ✅ **Features Implemented**
-* [x] Connection to MongoDB Atlas (once during app startup).
-* [x] API key input and secure state handling using `gr.State`.
-* [x] Vector search over embedded shloka data.
-* [x] Chat interface with dynamic UI (hides API key and button post-auth).
-* [x] RAG-based responses tailored to Valmiki Ramayana structure.
-* [x] Modular, clean design for future extensibility (e.g., Bhagavad Gita, Mahabharata).
----
 """
 groq_api_key = """
@@ -96,6 +131,82 @@ groq_api_key = """
 ⚠️ **Don't share** your API key. Revoke and regenerate if needed.
 """
 footer = """
 <div style="background-color: #1d2938; color: white; padding: 10px; width: 100%; bottom: 0; left: 0; display: flex; justify-content: space-between; align-items: center; padding: .2rem 35px; box-sizing: border-box; font-size: 16px;">
     <div style="text-align: left;">

 description = """
+## 🕉️ **Project Title: RamayanaGPT & GitaGPT – RAG-based Chatbots for Ancient Indian Epics**
 ---
 ### 🔍 **Project Overview**
+**RamayanaGPT** and **GitaGPT** are knowledge-based conversational AI tools designed to answer questions from the *Valmiki Ramayana* and the *Bhagavad Gita*, respectively. These chatbots use **Retrieval-Augmented Generation (RAG)** architecture to generate accurate, scripture-based responses. They combine powerful **vector search capabilities** with **large language models (LLMs)** to deliver spiritually insightful, context-rich conversations.
+These tools leverage:
+* **MongoDB Atlas Vector Search** for embedding-based document retrieval
+* **Hugging Face** embeddings (`intfloat/multilingual-e5-base`)
+* **Groq LLaMA 3.1 8B** via API
+* **LlamaIndex** for orchestration
+* **Gradio** for user interface
 ---
 #### 1. **Vector Store: MongoDB Atlas**
+* Two collections are created in the `RAG` database:
+  * `ramayana` for **Valmiki Ramayana**
+  * `bhagavad_gita` for **Bhagavad Gita**
+* Each collection contains vector indexes:
+  * `ramayana_vector_index`
+  * `gita_vector_index`
+* Each document includes:
+  * For Ramayana: `kanda`, `sarga`, `shloka`, `shloka_text`, and `explanation`
+  * For Gita: `Title`, `Chapter`, `Verse`, and `explanation`
+#### 2. **Vector Embedding: Hugging Face**
+* Model: `intfloat/multilingual-e5-base`
+* Used to convert `shloka_text + explanation` or `verse + explanation` into vector representations
+* These embeddings are indexed into MongoDB for semantic similarity search
 #### 3. **Language Model: Groq API**
+* LLM used: `llama-3.1-8b-instant` via **Groq API**
+* Users input their Groq API key securely
+* LLM is instantiated per query using `llama_index.llms.groq.Groq`
 #### 4. **Prompt Engineering**
+* Custom **PromptTemplates** guide the response structure for each chatbot
+* **RamayanaGPT Prompt**:
+  * Introduction to the query
+  * Related shlokas with explanations
+  * Summary/overview
+* **GitaGPT Prompt**:
+  * Context or spiritual background
+  * Relevant verse(s) with meaning
+  * Reflective conclusion
+#### 5. **Index Initialization**
+* Vector indexes are loaded **once** at application startup:
+  ```python
+  ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
+  gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
+  ```
+* Shared across all user queries for speed and efficiency
 #### 6. **User Interface: Gradio**
+* Built with `gr.Blocks` using the `Soft` theme and `Roboto Mono` font
+* Two tabs:
+  * 🕉️ **RamayanaGPT**
+  * 🕉️ **GitaGPT**
+* Users enter their Groq API key once; it's stored in `gr.State`
+* Upon authentication:
+  * API key input and help accordion are hidden
+  * Full chat interface is revealed (`gr.ChatInterface`)
 ---
 ### ⚙️ **Technical Stack**
+| Component       | Technology                            |
+| --------------- | ------------------------------------- |
+| Backend LLM     | Groq (LLaMA 3.1 8B via API)           |
+| Embedding Model | Hugging Face (`multilingual-e5-base`) |
+| Vector Store    | MongoDB Atlas Vector Search           |
+| Vector Engine   | LlamaIndex VectorStoreIndex           |
+| Prompt Engine   | LlamaIndex PromptTemplate             |
+| Query Engine    | LlamaIndex Query Engine               |
+| UI Framework    | Gradio (Blocks + ChatInterface)       |
+| Deployment      | Python app using `app.py`             |
 ---
 ### ✅ **Features Implemented**
+* [x] Vector search using MongoDB Atlas
+  * `ramayana_vector_index` for Valmiki Ramayana
+  * `gita_vector_index` for Bhagavad Gita
+* [x] Hugging Face embedding (`e5-base`) integration
+* [x] API key input and session handling with `gr.State`
+* [x] LLM integration via Groq API
+* [x] Prompt templates customized for each scripture
+* [x] Tabbed interface for seamless switching between RamayanaGPT and GitaGPT
+* [x] Clean UX with collapsible Groq API key instructions
+* [x] Logging of each query with timestamp (for debugging/monitoring)
 """
 groq_api_key = """
 ⚠️ **Don't share** your API key. Revoke and regenerate if needed.
 """
+RamayanaGPT='''
+## 🕉️ **RamayanaGPT – Overview and Dataset Summary**
+### 📖 **Introduction**
+**RamayanaGPT** is a RAG-based chatbot that draws upon the **Valmiki Ramayana**, the original Sanskrit epic, to answer user queries with reference to shlokas and their commentaries. It aims to offer precise, contextual, and respectful responses using advanced retrieval and generation technologies.
+### 🗂️ **Dataset Structure**
+The uploaded Ramayana dataset includes the following columns:
+| Column        | Description                                                                    |
+| ------------- | ------------------------------------------------------------------------------ |
+| `kanda`       | One of the 7 books (kandas) of the Ramayana (e.g., Bala Kanda, Ayodhya Kanda). |
+| `sarga`       | The chapter number within each kanda.                                          |
+| `shloka`      | The shloka (verse) number within the sarga.                                    |
+| `shloka_text` | Original Sanskrit verse.                                                       |
+| `explanation` | English explanation or interpretation of the shloka.                           |
+### 🔍 **Example**
+```text
+kanda: Bala Kanda
+sarga: 1
+shloka: 1
+shloka_text: तपस्स्वाध्यायनिरतं तपस्वी वाग्विदां वरम् ।
+explanation: Ascetic Valmiki enquired of Narada, preeminent among sages, who was engaged in penance and study of the Vedas.
+```
+### 💡 **Insights**
+* The data is well-structured with nearly **1,400+** records.
+* Each record reflects a deep philosophical or narrative moment from the epic.
+* Metadata (`kanda`, `sarga`, `shloka`) allows precise retrieval and organization.
+* Used for vector indexing and semantic retrieval.
+'''
+GitaGPT='''
+## 🕉️ **GitaGPT – Overview and Dataset Summary**
+### 📖 **Introduction**
+**GitaGPT** is a chatbot built to answer spiritual and philosophical questions using the **Bhagavad Gita** as its primary source. It references verses (slokas) directly from the Gita, delivering insights supported by both Sanskrit, Hindi, and English explanations.
+### 🗂️ **Dataset Structure**
+The uploaded Gita dataset contains the following fields:
+| Column                | Description                                         |
+| --------------------- | --------------------------------------------------- |
+| `S.No.`               | Serial number of the verse.                         |
+| `Title`               | Title of the chapter (e.g., Arjuna's Vishada Yoga). |
+| `Chapter`             | Gita chapter number (e.g., Chapter 1).              |
+| `Verse`               | Verse ID (e.g., Verse 1.1).                         |
+| `Sanskrit Anuvad`     | Original verse in Devanagari Sanskrit.              |
+| `Hindi Anuvad`        | Hindi translation/interpretation.                   |
+| `Enlgish Translation` | English translation/interpretation.                 |
+### 🔍 **Example**
+```text
+Chapter: Chapter 1
+Verse: Verse 1.1
+Sanskrit: धृतराष्ट्र उवाच । धर्मक्षेत्रे कुरुक्षेत्रे समवेता युयुत्सवः...
+Hindi: धृतराष्ट्र बोले- हे संजय! धर्मभूमि कुरुक्षेत्र में एकत्र हुए युद्ध की इच्छा रखने वाले...
+English: Dhrtarashtra asked of Sanjaya: O SANJAYA, what did my sons and the sons of Pandu do?
+```
+### 💡 **Insights**
+* The dataset contains **700+ verses** from all 18 chapters.
+* Multilingual representation (Sanskrit, Hindi, English) enhances usability for diverse users.
+* The verse structure (`Chapter`, `Verse`) aids in precise referencing and response generation.
+* Perfectly suited for semantic search via vector embeddings.
+'''
 footer = """
 <div style="background-color: #1d2938; color: white; padding: 10px; width: 100%; bottom: 0; left: 0; display: flex; justify-content: space-between; align-items: center; padding: .2rem 35px; box-sizing: border-box; font-size: 16px;">
     <div style="text-align: left;">