Sarath0x8f commited on
Commit
32ca645
ยท
verified ยท
1 Parent(s): f85cde7

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +45 -67
  2. markdown.py +54 -57
app.py CHANGED
@@ -3,7 +3,7 @@ import pymongo
3
  import certifi
4
  from llama_index.core import VectorStoreIndex
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
- from llama_index.llms.groq import Groq
7
  from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
8
  from llama_index.core.prompts import PromptTemplate
9
  from dotenv import load_dotenv
@@ -18,7 +18,7 @@ load_dotenv()
18
  # --- Embedding Model ---
19
  embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
20
 
21
- # --- Prompt Template ---
22
  ramayana_qa_template = PromptTemplate(
23
  """You are an expert on the Valmiki Ramayana and a guide who always inspires people with the great Itihasa like the Ramayana.
24
 
@@ -33,7 +33,7 @@ ramayana_qa_template = PromptTemplate(
33
 
34
  Answer:
35
  - Intro or general description to ```Query```
36
- - Related shloka/shlokas followed by its explanation
37
  - Overview of ```Query```"""
38
  )
39
 
@@ -51,18 +51,15 @@ gita_qa_template = PromptTemplate(
51
 
52
  Answer:
53
  - Intro or context about the topic
54
- - Relevant verse(s) with explanation
55
  - Conclusion or reflection"""
56
  )
57
 
58
- # --- Connect to MongoDB once at startup ---
59
  def get_vector_index(db_name, collection_name, vector_index_name):
60
  mongo_client = pymongo.MongoClient(
61
  os.getenv("ATLAS_CONNECTION_STRING"),
62
  tlsCAFile=certifi.where(),
63
- tlsAllowInvalidCertificates=False,
64
- connectTimeoutMS=30000,
65
- serverSelectionTimeoutMS=30000,
66
  )
67
  mongo_client.server_info()
68
  print(f"โœ… Connected to MongoDB Atlas for collection: {collection_name}")
@@ -75,29 +72,38 @@ def get_vector_index(db_name, collection_name, vector_index_name):
75
  )
76
  return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
77
 
78
- # --- Respond Function (uses API key from state) ---
79
- def chat_with_groq(index, template):
80
- def fn(message, history, groq_key):
81
- if not groq_key or not groq_key.startswith("gsk_"):
82
- return "โŒ Invalid Groq API Key. Please enter a valid key."
83
- llm = Groq(model="llama-3.1-8b-instant", api_key=groq_key)
84
- query_engine = index.as_query_engine(
85
- llm=llm,
86
- text_qa_template=template,
87
- similarity_top_k=5,
88
- verbose=True,
89
- )
90
- response = query_engine.query(message)
91
- print(f"\n{datetime.now()}:: {message} --> {str(response)}\n")
92
- return str(response)
93
- return fn
94
-
95
- # Load vector indices once
96
  ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
97
  gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
98
 
99
- # Encode logos
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
 
 
 
 
 
 
 
 
 
 
101
  def encode_image(image_path):
102
  with open(image_path, "rb") as image_file:
103
  return base64.b64encode(image_file.read()).decode("utf-8")
@@ -107,54 +113,26 @@ linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
107
  website_logo_encoded = encode_image("Images/ai-logo.png")
108
 
109
  # --- Gradio UI ---
110
- with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
111
  with gr.Tabs():
112
  with gr.TabItem("Intro"):
113
  gr.Markdown(md.description)
114
 
115
- def create_tab(tab_title, chatbot_title, vector_index, template, intro):
116
  with gr.TabItem(tab_title):
117
- with gr.Column(visible=True) as accordion_container:
118
- with gr.Accordion("How to get Groq API KEY", open=False):
119
- gr.Markdown(md.groq_api_key)
120
-
121
- groq_key_box = gr.Textbox(
122
- label="Enter Groq API Key",
123
- type="password",
124
- placeholder="Paste your Groq API key here..."
125
- )
126
-
127
- start_btn = gr.Button("Start Chat")
128
- groq_state = gr.State(value="")
129
-
130
- with gr.Column(visible=False) as chatbot_container:
131
- with gr.Accordion("Overview & Summary", open=False):
132
- gr.Markdown(intro)
133
- chatbot = gr.ChatInterface(
134
- fn=chat_with_groq(vector_index, template),
135
- additional_inputs=[groq_state],
136
- chatbot=gr.Chatbot(height=500),
137
- title=chatbot_title,
138
- show_progress="full",
139
- fill_height=True,
140
- )
141
-
142
- def save_key_and_show_chat(key):
143
- if key and key.startswith("gsk_"):
144
- return key, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
145
- else:
146
- return "", gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
147
-
148
- start_btn.click(
149
- fn=save_key_and_show_chat,
150
- inputs=[groq_key_box],
151
- outputs=[groq_state, groq_key_box, start_btn, accordion_container, chatbot_container]
152
  )
153
 
154
- create_tab("RamayanaGPT", "๐Ÿ•‰๏ธ RamayanaGPT", ramayana_index, ramayana_qa_template, md.RamayanaGPT)
155
- create_tab("GitaGPT", "๐Ÿ•‰๏ธ GitaGPT", gita_index, gita_qa_template, md.GitaGPT)
156
 
157
  gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
158
 
159
  if __name__ == "__main__":
160
- demo.launch()
 
3
  import certifi
4
  from llama_index.core import VectorStoreIndex
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+ from llama_index.llms.gemini import Gemini
7
  from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
8
  from llama_index.core.prompts import PromptTemplate
9
  from dotenv import load_dotenv
 
18
  # --- Embedding Model ---
19
  embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
20
 
21
+ # --- Prompt Templates ---
22
  ramayana_qa_template = PromptTemplate(
23
  """You are an expert on the Valmiki Ramayana and a guide who always inspires people with the great Itihasa like the Ramayana.
24
 
 
33
 
34
  Answer:
35
  - Intro or general description to ```Query```
36
+ - Related sanskrit shloka(s) followed by its explanation
37
  - Overview of ```Query```"""
38
  )
39
 
 
51
 
52
  Answer:
53
  - Intro or context about the topic
54
+ - Relevant sanskrit verse(s) with explanation
55
  - Conclusion or reflection"""
56
  )
57
 
58
+ # --- MongoDB Vector Index Loader ---
59
  def get_vector_index(db_name, collection_name, vector_index_name):
60
  mongo_client = pymongo.MongoClient(
61
  os.getenv("ATLAS_CONNECTION_STRING"),
62
  tlsCAFile=certifi.where(),
 
 
 
63
  )
64
  mongo_client.server_info()
65
  print(f"โœ… Connected to MongoDB Atlas for collection: {collection_name}")
 
72
  )
73
  return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
74
 
75
+ # --- Load Indices Once ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
77
  gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
78
 
79
+ # --- Gradio Chat Wrapper with Streaming ---
80
+ def chat(index, template):
81
+ llm = Gemini(
82
+ model="models/gemini-1.5-flash",
83
+ api_key=os.getenv("GOOGLE_API_KEY"),
84
+ streaming=True
85
+ )
86
+ query_engine = index.as_query_engine(
87
+ llm=llm,
88
+ text_qa_template=template,
89
+ similarity_top_k=5,
90
+ streaming=True,
91
+ verbose=True,
92
+ )
93
+
94
+ def fn(message, history):
95
+ streaming_response = query_engine.query(message)
96
 
97
+ full_response = ""
98
+ for text in streaming_response.response_gen:
99
+ full_response += text
100
+ yield full_response
101
+ response = query_engine.query(message)
102
+ yield str(response)
103
+ print(f"\n{datetime.now()}:: {message} --> {str(full_response)}\n")
104
+ return fn
105
+
106
+ # --- Encode Logos ---
107
  def encode_image(image_path):
108
  with open(image_path, "rb") as image_file:
109
  return base64.b64encode(image_file.read()).decode("utf-8")
 
113
  website_logo_encoded = encode_image("Images/ai-logo.png")
114
 
115
  # --- Gradio UI ---
116
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css="footer {visibility: hidden}") as demo:
117
  with gr.Tabs():
118
  with gr.TabItem("Intro"):
119
  gr.Markdown(md.description)
120
 
121
+ def create_tab(tab_title, vector_index, template, intro_md):
122
  with gr.TabItem(tab_title):
123
+ with gr.Accordion("==========> Overview & Summary <==========", open=False):
124
+ gr.Markdown(intro_md)
125
+ gr.ChatInterface(
126
+ fn=chat(vector_index, template),
127
+ chatbot=gr.Chatbot(height=500),
128
+ show_progress="full",
129
+ fill_height=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
 
132
+ create_tab("RamayanaGPT๐Ÿน", ramayana_index, ramayana_qa_template, md.RamayanaGPT)
133
+ create_tab("GitaGPT๐Ÿ›ž", gita_index, gita_qa_template, md.GitaGPT)
134
 
135
  gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
136
 
137
  if __name__ == "__main__":
138
+ demo.launch()
markdown.py CHANGED
@@ -1,19 +1,19 @@
1
  description = """
2
- ## ๐Ÿ•‰๏ธ **Project Title: RamayanaGPT & GitaGPT โ€“ RAG-based Chatbots for Ancient Indian Epics**
3
 
4
  ---
5
 
6
  ### ๐Ÿ” **Project Overview**
7
 
8
- **RamayanaGPT** and **GitaGPT** are knowledge-based conversational AI tools designed to answer questions from the *Valmiki Ramayana* and the *Bhagavad Gita*, respectively. These chatbots use **Retrieval-Augmented Generation (RAG)** architecture to generate accurate, scripture-based responses. They combine powerful **vector search capabilities** with **large language models (LLMs)** to deliver spiritually insightful, context-rich conversations.
9
 
10
- These tools leverage:
11
 
12
- * **MongoDB Atlas Vector Search** for embedding-based document retrieval
13
- * **Hugging Face** embeddings (`intfloat/multilingual-e5-base`)
14
- * **Groq LLaMA 3.1 8B** via API
15
- * **LlamaIndex** for orchestration
16
- * **Gradio** for user interface
17
 
18
  ---
19
 
@@ -21,67 +21,66 @@ These tools leverage:
21
 
22
  #### 1. **Vector Store: MongoDB Atlas**
23
 
24
- * Two collections are created in the `RAG` database:
 
 
 
 
 
25
 
26
- * `ramayana` for **Valmiki Ramayana**
27
- * `bhagavad_gita` for **Bhagavad Gita**
28
- * Each collection contains vector indexes:
29
-
30
- * `ramayana_vector_index`
31
- * `gita_vector_index`
32
- * Each document includes:
33
-
34
- * For Ramayana: `kanda`, `sarga`, `shloka`, `shloka_text`, and `explanation`
35
- * For Gita: `Title`, `Chapter`, `Verse`, and `explanation`
36
 
37
  #### 2. **Vector Embedding: Hugging Face**
38
 
39
- * Model: `intfloat/multilingual-e5-base`
40
- * Used to convert `shloka_text + explanation` or `verse + explanation` into vector representations
41
- * These embeddings are indexed into MongoDB for semantic similarity search
42
 
43
- #### 3. **Language Model: Groq API**
44
 
45
- * LLM used: `llama-3.1-8b-instant` via **Groq API**
46
- * Users input their Groq API key securely
47
- * LLM is instantiated per query using `llama_index.llms.groq.Groq`
 
48
 
49
  #### 4. **Prompt Engineering**
50
 
51
- * Custom **PromptTemplates** guide the response structure for each chatbot
52
- * **RamayanaGPT Prompt**:
53
 
54
- * Introduction to the query
55
- * Related shlokas with explanations
56
- * Summary/overview
57
- * **GitaGPT Prompt**:
58
 
59
- * Context or spiritual background
60
- * Relevant verse(s) with meaning
61
- * Reflective conclusion
 
62
 
63
  #### 5. **Index Initialization**
64
 
65
- * Vector indexes are loaded **once** at application startup:
66
 
67
- ```python
68
- ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
69
- gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
70
- ```
71
- * Shared across all user queries for speed and efficiency
 
72
 
73
  #### 6. **User Interface: Gradio**
74
 
75
- * Built with `gr.Blocks` using the `Soft` theme and `Roboto Mono` font
76
- * Two tabs:
77
 
78
  * ๐Ÿ•‰๏ธ **RamayanaGPT**
79
  * ๐Ÿ•‰๏ธ **GitaGPT**
80
- * Users enter their Groq API key once; it's stored in `gr.State`
81
- * Upon authentication:
82
 
83
- * API key input and help accordion are hidden
84
- * Full chat interface is revealed (`gr.ChatInterface`)
85
 
86
  ---
87
 
@@ -89,7 +88,7 @@ These tools leverage:
89
 
90
  | Component | Technology |
91
  | --------------- | ------------------------------------- |
92
- | Backend LLM | Groq (LLaMA 3.1 8B via API) |
93
  | Embedding Model | Hugging Face (`multilingual-e5-base`) |
94
  | Vector Store | MongoDB Atlas Vector Search |
95
  | Vector Engine | LlamaIndex VectorStoreIndex |
@@ -103,16 +102,14 @@ These tools leverage:
103
  ### โœ… **Features Implemented**
104
 
105
  * [x] Vector search using MongoDB Atlas
 
 
 
 
 
 
106
 
107
- * `ramayana_vector_index` for Valmiki Ramayana
108
- * `gita_vector_index` for Bhagavad Gita
109
- * [x] Hugging Face embedding (`e5-base`) integration
110
- * [x] API key input and session handling with `gr.State`
111
- * [x] LLM integration via Groq API
112
- * [x] Prompt templates customized for each scripture
113
- * [x] Tabbed interface for seamless switching between RamayanaGPT and GitaGPT
114
- * [x] Clean UX with collapsible Groq API key instructions
115
- * [x] Logging of each query with timestamp (for debugging/monitoring)
116
 
117
  """
118
 
 
1
  description = """
2
+ ## ๐Ÿ•‰๏ธ **Project Title: Epic-Minds ๐Ÿน๐Ÿ›ž**
3
 
4
  ---
5
 
6
  ### ๐Ÿ” **Project Overview**
7
 
8
+ **RamayanaGPT** and **GitaGPT** are intelligent chatbots designed to answer spiritual and literary questions from the *Valmiki Ramayana* and the *Bhagavad Gita* respectively. They follow a **Retrieval-Augmented Generation (RAG)** pipeline to ensure that the responses are contextually grounded in the scriptures.
9
 
10
+ These tools integrate:
11
 
12
+ - **MongoDB Atlas Vector Search** for semantic document retrieval
13
+ - **Hugging Face embeddings** (`intfloat/multilingual-e5-base`)
14
+ - **Gemini Flash 1.5 API** as the large language model
15
+ - **LlamaIndex** for orchestration and query handling
16
+ - **Gradio** for a clean and simple user interface
17
 
18
  ---
19
 
 
21
 
22
  #### 1. **Vector Store: MongoDB Atlas**
23
 
24
+ - Two collections are created in the `RAG` database:
25
+ - `ramayana` for **Valmiki Ramayana**
26
+ - `bhagavad_gita` for **Bhagavad Gita**
27
+ - Each collection has an associated vector index:
28
+ - `ramayana_vector_index`
29
+ - `gita_vector_index`
30
 
31
+ Each document includes:
32
+ - **Ramayana**: `kanda`, `sarga`, `shloka`, `shloka_text`, `explanation`
33
+ - **Gita**: `Title`, `Chapter`, `Verse`, `explanation`
 
 
 
 
 
 
 
34
 
35
  #### 2. **Vector Embedding: Hugging Face**
36
 
37
+ - Model used: `intfloat/multilingual-e5-base`
38
+ - Text format for embedding: `shloka_text + explanation` or `verse + explanation`
39
+ - Indexed into MongoDB for fast semantic retrieval
40
 
41
+ #### 3. **Language Model: Gemini Flash**
42
 
43
+ - Model: `gemini-1.5-flash`
44
+ - Integrated via `llama_index.llms.gemini.Gemini`
45
+ - API key loaded from environment variables
46
+ - No user input required for keysโ€”simplifies interface and experience
47
 
48
  #### 4. **Prompt Engineering**
49
 
50
+ Custom **PromptTemplates** for each chatbot to guide structured, scripture-faithful responses.
 
51
 
52
+ - **RamayanaGPT Prompt**:
53
+ - Intro or overview of the query
54
+ - Related Sanskrit shloka(s) with explanation
55
+ - Summary of the topic
56
 
57
+ - **GitaGPT Prompt**:
58
+ - Spiritual or contextual introduction
59
+ - Relevant verse(s) and meaning
60
+ - Reflective conclusion
61
 
62
  #### 5. **Index Initialization**
63
 
64
+ - Vector indices are initialized **once** at app startup:
65
 
66
+ ```python
67
+ ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
68
+ gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
69
+ ````
70
+
71
+ * Shared across sessions for efficiency
72
 
73
  #### 6. **User Interface: Gradio**
74
 
75
+ * Built using `gr.Blocks` with the `Soft` theme and `Roboto Mono` font
76
+ * Clean tabbed interface:
77
 
78
  * ๐Ÿ•‰๏ธ **RamayanaGPT**
79
  * ๐Ÿ•‰๏ธ **GitaGPT**
80
+ * Each tab features:
 
81
 
82
+ * Overview accordion
83
+ * Chat window powered by `gr.ChatInterface`
84
 
85
  ---
86
 
 
88
 
89
  | Component | Technology |
90
  | --------------- | ------------------------------------- |
91
+ | Backend LLM | Gemini Flash 1.5 (via API) |
92
  | Embedding Model | Hugging Face (`multilingual-e5-base`) |
93
  | Vector Store | MongoDB Atlas Vector Search |
94
  | Vector Engine | LlamaIndex VectorStoreIndex |
 
102
  ### โœ… **Features Implemented**
103
 
104
  * [x] Vector search using MongoDB Atlas
105
+ * [x] Hugging Face embedding integration
106
+ * [x] Gemini Flash 1.5 LLM integration
107
+ * [x] Structured prompts per scripture
108
+ * [x] Tabbed Gradio UI for easy switching
109
+ * [x] Collapsible summaries for each section
110
+ * [x] Query logging with timestamp
111
 
112
+ ---
 
 
 
 
 
 
 
 
113
 
114
  """
115