Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +45 -67
- markdown.py +54 -57
app.py
CHANGED
@@ -3,7 +3,7 @@ import pymongo
|
|
3 |
import certifi
|
4 |
from llama_index.core import VectorStoreIndex
|
5 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
6 |
-
from llama_index.llms.
|
7 |
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
|
8 |
from llama_index.core.prompts import PromptTemplate
|
9 |
from dotenv import load_dotenv
|
@@ -18,7 +18,7 @@ load_dotenv()
|
|
18 |
# --- Embedding Model ---
|
19 |
embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
|
20 |
|
21 |
-
# --- Prompt
|
22 |
ramayana_qa_template = PromptTemplate(
|
23 |
"""You are an expert on the Valmiki Ramayana and a guide who always inspires people with the great Itihasa like the Ramayana.
|
24 |
|
@@ -33,7 +33,7 @@ ramayana_qa_template = PromptTemplate(
|
|
33 |
|
34 |
Answer:
|
35 |
- Intro or general description to ```Query```
|
36 |
-
- Related shloka
|
37 |
- Overview of ```Query```"""
|
38 |
)
|
39 |
|
@@ -51,18 +51,15 @@ gita_qa_template = PromptTemplate(
|
|
51 |
|
52 |
Answer:
|
53 |
- Intro or context about the topic
|
54 |
-
- Relevant verse(s) with explanation
|
55 |
- Conclusion or reflection"""
|
56 |
)
|
57 |
|
58 |
-
# ---
|
59 |
def get_vector_index(db_name, collection_name, vector_index_name):
|
60 |
mongo_client = pymongo.MongoClient(
|
61 |
os.getenv("ATLAS_CONNECTION_STRING"),
|
62 |
tlsCAFile=certifi.where(),
|
63 |
-
tlsAllowInvalidCertificates=False,
|
64 |
-
connectTimeoutMS=30000,
|
65 |
-
serverSelectionTimeoutMS=30000,
|
66 |
)
|
67 |
mongo_client.server_info()
|
68 |
print(f"โ
Connected to MongoDB Atlas for collection: {collection_name}")
|
@@ -75,29 +72,38 @@ def get_vector_index(db_name, collection_name, vector_index_name):
|
|
75 |
)
|
76 |
return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
|
77 |
|
78 |
-
# ---
|
79 |
-
def chat_with_groq(index, template):
|
80 |
-
def fn(message, history, groq_key):
|
81 |
-
if not groq_key or not groq_key.startswith("gsk_"):
|
82 |
-
return "โ Invalid Groq API Key. Please enter a valid key."
|
83 |
-
llm = Groq(model="llama-3.1-8b-instant", api_key=groq_key)
|
84 |
-
query_engine = index.as_query_engine(
|
85 |
-
llm=llm,
|
86 |
-
text_qa_template=template,
|
87 |
-
similarity_top_k=5,
|
88 |
-
verbose=True,
|
89 |
-
)
|
90 |
-
response = query_engine.query(message)
|
91 |
-
print(f"\n{datetime.now()}:: {message} --> {str(response)}\n")
|
92 |
-
return str(response)
|
93 |
-
return fn
|
94 |
-
|
95 |
-
# Load vector indices once
|
96 |
ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
|
97 |
gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
|
98 |
|
99 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
def encode_image(image_path):
|
102 |
with open(image_path, "rb") as image_file:
|
103 |
return base64.b64encode(image_file.read()).decode("utf-8")
|
@@ -107,54 +113,26 @@ linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
|
|
107 |
website_logo_encoded = encode_image("Images/ai-logo.png")
|
108 |
|
109 |
# --- Gradio UI ---
|
110 |
-
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css=
|
111 |
with gr.Tabs():
|
112 |
with gr.TabItem("Intro"):
|
113 |
gr.Markdown(md.description)
|
114 |
|
115 |
-
def create_tab(tab_title,
|
116 |
with gr.TabItem(tab_title):
|
117 |
-
with gr.
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
placeholder="Paste your Groq API key here..."
|
125 |
-
)
|
126 |
-
|
127 |
-
start_btn = gr.Button("Start Chat")
|
128 |
-
groq_state = gr.State(value="")
|
129 |
-
|
130 |
-
with gr.Column(visible=False) as chatbot_container:
|
131 |
-
with gr.Accordion("Overview & Summary", open=False):
|
132 |
-
gr.Markdown(intro)
|
133 |
-
chatbot = gr.ChatInterface(
|
134 |
-
fn=chat_with_groq(vector_index, template),
|
135 |
-
additional_inputs=[groq_state],
|
136 |
-
chatbot=gr.Chatbot(height=500),
|
137 |
-
title=chatbot_title,
|
138 |
-
show_progress="full",
|
139 |
-
fill_height=True,
|
140 |
-
)
|
141 |
-
|
142 |
-
def save_key_and_show_chat(key):
|
143 |
-
if key and key.startswith("gsk_"):
|
144 |
-
return key, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
145 |
-
else:
|
146 |
-
return "", gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
|
147 |
-
|
148 |
-
start_btn.click(
|
149 |
-
fn=save_key_and_show_chat,
|
150 |
-
inputs=[groq_key_box],
|
151 |
-
outputs=[groq_state, groq_key_box, start_btn, accordion_container, chatbot_container]
|
152 |
)
|
153 |
|
154 |
-
create_tab("RamayanaGPT",
|
155 |
-
create_tab("GitaGPT",
|
156 |
|
157 |
gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
|
158 |
|
159 |
if __name__ == "__main__":
|
160 |
-
demo.launch()
|
|
|
3 |
import certifi
|
4 |
from llama_index.core import VectorStoreIndex
|
5 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
6 |
+
from llama_index.llms.gemini import Gemini
|
7 |
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
|
8 |
from llama_index.core.prompts import PromptTemplate
|
9 |
from dotenv import load_dotenv
|
|
|
18 |
# --- Embedding Model ---
|
19 |
embed_model = HuggingFaceEmbedding(model_name="intfloat/multilingual-e5-base")
|
20 |
|
21 |
+
# --- Prompt Templates ---
|
22 |
ramayana_qa_template = PromptTemplate(
|
23 |
"""You are an expert on the Valmiki Ramayana and a guide who always inspires people with the great Itihasa like the Ramayana.
|
24 |
|
|
|
33 |
|
34 |
Answer:
|
35 |
- Intro or general description to ```Query```
|
36 |
+
- Related sanskrit shloka(s) followed by its explanation
|
37 |
- Overview of ```Query```"""
|
38 |
)
|
39 |
|
|
|
51 |
|
52 |
Answer:
|
53 |
- Intro or context about the topic
|
54 |
+
- Relevant sanskrit verse(s) with explanation
|
55 |
- Conclusion or reflection"""
|
56 |
)
|
57 |
|
58 |
+
# --- MongoDB Vector Index Loader ---
|
59 |
def get_vector_index(db_name, collection_name, vector_index_name):
|
60 |
mongo_client = pymongo.MongoClient(
|
61 |
os.getenv("ATLAS_CONNECTION_STRING"),
|
62 |
tlsCAFile=certifi.where(),
|
|
|
|
|
|
|
63 |
)
|
64 |
mongo_client.server_info()
|
65 |
print(f"โ
Connected to MongoDB Atlas for collection: {collection_name}")
|
|
|
72 |
)
|
73 |
return VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
|
74 |
|
75 |
+
# --- Load Indices Once ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
|
77 |
gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
|
78 |
|
79 |
+
# --- Gradio Chat Wrapper with Streaming ---
|
80 |
+
def chat(index, template):
|
81 |
+
llm = Gemini(
|
82 |
+
model="models/gemini-1.5-flash",
|
83 |
+
api_key=os.getenv("GOOGLE_API_KEY"),
|
84 |
+
streaming=True
|
85 |
+
)
|
86 |
+
query_engine = index.as_query_engine(
|
87 |
+
llm=llm,
|
88 |
+
text_qa_template=template,
|
89 |
+
similarity_top_k=5,
|
90 |
+
streaming=True,
|
91 |
+
verbose=True,
|
92 |
+
)
|
93 |
+
|
94 |
+
def fn(message, history):
|
95 |
+
streaming_response = query_engine.query(message)
|
96 |
|
97 |
+
full_response = ""
|
98 |
+
for text in streaming_response.response_gen:
|
99 |
+
full_response += text
|
100 |
+
yield full_response
|
101 |
+
response = query_engine.query(message)
|
102 |
+
yield str(response)
|
103 |
+
print(f"\n{datetime.now()}:: {message} --> {str(full_response)}\n")
|
104 |
+
return fn
|
105 |
+
|
106 |
+
# --- Encode Logos ---
|
107 |
def encode_image(image_path):
|
108 |
with open(image_path, "rb") as image_file:
|
109 |
return base64.b64encode(image_file.read()).decode("utf-8")
|
|
|
113 |
website_logo_encoded = encode_image("Images/ai-logo.png")
|
114 |
|
115 |
# --- Gradio UI ---
|
116 |
+
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css="footer {visibility: hidden}") as demo:
|
117 |
with gr.Tabs():
|
118 |
with gr.TabItem("Intro"):
|
119 |
gr.Markdown(md.description)
|
120 |
|
121 |
+
def create_tab(tab_title, vector_index, template, intro_md):
|
122 |
with gr.TabItem(tab_title):
|
123 |
+
with gr.Accordion("==========> Overview & Summary <==========", open=False):
|
124 |
+
gr.Markdown(intro_md)
|
125 |
+
gr.ChatInterface(
|
126 |
+
fn=chat(vector_index, template),
|
127 |
+
chatbot=gr.Chatbot(height=500),
|
128 |
+
show_progress="full",
|
129 |
+
fill_height=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
)
|
131 |
|
132 |
+
create_tab("RamayanaGPT๐น", ramayana_index, ramayana_qa_template, md.RamayanaGPT)
|
133 |
+
create_tab("GitaGPT๐", gita_index, gita_qa_template, md.GitaGPT)
|
134 |
|
135 |
gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
|
136 |
|
137 |
if __name__ == "__main__":
|
138 |
+
demo.launch()
|
markdown.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1 |
description = """
|
2 |
-
## ๐๏ธ **Project Title:
|
3 |
|
4 |
---
|
5 |
|
6 |
### ๐ **Project Overview**
|
7 |
|
8 |
-
**RamayanaGPT** and **GitaGPT** are
|
9 |
|
10 |
-
These tools
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
|
18 |
---
|
19 |
|
@@ -21,67 +21,66 @@ These tools leverage:
|
|
21 |
|
22 |
#### 1. **Vector Store: MongoDB Atlas**
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
* `ramayana_vector_index`
|
31 |
-
* `gita_vector_index`
|
32 |
-
* Each document includes:
|
33 |
-
|
34 |
-
* For Ramayana: `kanda`, `sarga`, `shloka`, `shloka_text`, and `explanation`
|
35 |
-
* For Gita: `Title`, `Chapter`, `Verse`, and `explanation`
|
36 |
|
37 |
#### 2. **Vector Embedding: Hugging Face**
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
|
43 |
-
#### 3. **Language Model:
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
48 |
|
49 |
#### 4. **Prompt Engineering**
|
50 |
|
51 |
-
|
52 |
-
* **RamayanaGPT Prompt**:
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
#### 5. **Index Initialization**
|
64 |
|
65 |
-
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
72 |
|
73 |
#### 6. **User Interface: Gradio**
|
74 |
|
75 |
-
* Built
|
76 |
-
*
|
77 |
|
78 |
* ๐๏ธ **RamayanaGPT**
|
79 |
* ๐๏ธ **GitaGPT**
|
80 |
-
*
|
81 |
-
* Upon authentication:
|
82 |
|
83 |
-
*
|
84 |
-
*
|
85 |
|
86 |
---
|
87 |
|
@@ -89,7 +88,7 @@ These tools leverage:
|
|
89 |
|
90 |
| Component | Technology |
|
91 |
| --------------- | ------------------------------------- |
|
92 |
-
| Backend LLM |
|
93 |
| Embedding Model | Hugging Face (`multilingual-e5-base`) |
|
94 |
| Vector Store | MongoDB Atlas Vector Search |
|
95 |
| Vector Engine | LlamaIndex VectorStoreIndex |
|
@@ -103,16 +102,14 @@ These tools leverage:
|
|
103 |
### โ
**Features Implemented**
|
104 |
|
105 |
* [x] Vector search using MongoDB Atlas
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
|
108 |
-
* `gita_vector_index` for Bhagavad Gita
|
109 |
-
* [x] Hugging Face embedding (`e5-base`) integration
|
110 |
-
* [x] API key input and session handling with `gr.State`
|
111 |
-
* [x] LLM integration via Groq API
|
112 |
-
* [x] Prompt templates customized for each scripture
|
113 |
-
* [x] Tabbed interface for seamless switching between RamayanaGPT and GitaGPT
|
114 |
-
* [x] Clean UX with collapsible Groq API key instructions
|
115 |
-
* [x] Logging of each query with timestamp (for debugging/monitoring)
|
116 |
|
117 |
"""
|
118 |
|
|
|
1 |
description = """
|
2 |
+
## ๐๏ธ **Project Title: Epic-Minds ๐น๐**
|
3 |
|
4 |
---
|
5 |
|
6 |
### ๐ **Project Overview**
|
7 |
|
8 |
+
**RamayanaGPT** and **GitaGPT** are intelligent chatbots designed to answer spiritual and literary questions from the *Valmiki Ramayana* and the *Bhagavad Gita* respectively. They follow a **Retrieval-Augmented Generation (RAG)** pipeline to ensure that the responses are contextually grounded in the scriptures.
|
9 |
|
10 |
+
These tools integrate:
|
11 |
|
12 |
+
- **MongoDB Atlas Vector Search** for semantic document retrieval
|
13 |
+
- **Hugging Face embeddings** (`intfloat/multilingual-e5-base`)
|
14 |
+
- **Gemini Flash 1.5 API** as the large language model
|
15 |
+
- **LlamaIndex** for orchestration and query handling
|
16 |
+
- **Gradio** for a clean and simple user interface
|
17 |
|
18 |
---
|
19 |
|
|
|
21 |
|
22 |
#### 1. **Vector Store: MongoDB Atlas**
|
23 |
|
24 |
+
- Two collections are created in the `RAG` database:
|
25 |
+
- `ramayana` for **Valmiki Ramayana**
|
26 |
+
- `bhagavad_gita` for **Bhagavad Gita**
|
27 |
+
- Each collection has an associated vector index:
|
28 |
+
- `ramayana_vector_index`
|
29 |
+
- `gita_vector_index`
|
30 |
|
31 |
+
Each document includes:
|
32 |
+
- **Ramayana**: `kanda`, `sarga`, `shloka`, `shloka_text`, `explanation`
|
33 |
+
- **Gita**: `Title`, `Chapter`, `Verse`, `explanation`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
#### 2. **Vector Embedding: Hugging Face**
|
36 |
|
37 |
+
- Model used: `intfloat/multilingual-e5-base`
|
38 |
+
- Text format for embedding: `shloka_text + explanation` or `verse + explanation`
|
39 |
+
- Indexed into MongoDB for fast semantic retrieval
|
40 |
|
41 |
+
#### 3. **Language Model: Gemini Flash**
|
42 |
|
43 |
+
- Model: `gemini-1.5-flash`
|
44 |
+
- Integrated via `llama_index.llms.gemini.Gemini`
|
45 |
+
- API key loaded from environment variables
|
46 |
+
- No user input required for keysโsimplifies interface and experience
|
47 |
|
48 |
#### 4. **Prompt Engineering**
|
49 |
|
50 |
+
Custom **PromptTemplates** for each chatbot to guide structured, scripture-faithful responses.
|
|
|
51 |
|
52 |
+
- **RamayanaGPT Prompt**:
|
53 |
+
- Intro or overview of the query
|
54 |
+
- Related Sanskrit shloka(s) with explanation
|
55 |
+
- Summary of the topic
|
56 |
|
57 |
+
- **GitaGPT Prompt**:
|
58 |
+
- Spiritual or contextual introduction
|
59 |
+
- Relevant verse(s) and meaning
|
60 |
+
- Reflective conclusion
|
61 |
|
62 |
#### 5. **Index Initialization**
|
63 |
|
64 |
+
- Vector indices are initialized **once** at app startup:
|
65 |
|
66 |
+
```python
|
67 |
+
ramayana_index = get_vector_index("RAG", "ramayana", "ramayana_vector_index")
|
68 |
+
gita_index = get_vector_index("RAG", "bhagavad_gita", "gita_vector_index")
|
69 |
+
````
|
70 |
+
|
71 |
+
* Shared across sessions for efficiency
|
72 |
|
73 |
#### 6. **User Interface: Gradio**
|
74 |
|
75 |
+
* Built using `gr.Blocks` with the `Soft` theme and `Roboto Mono` font
|
76 |
+
* Clean tabbed interface:
|
77 |
|
78 |
* ๐๏ธ **RamayanaGPT**
|
79 |
* ๐๏ธ **GitaGPT**
|
80 |
+
* Each tab features:
|
|
|
81 |
|
82 |
+
* Overview accordion
|
83 |
+
* Chat window powered by `gr.ChatInterface`
|
84 |
|
85 |
---
|
86 |
|
|
|
88 |
|
89 |
| Component | Technology |
|
90 |
| --------------- | ------------------------------------- |
|
91 |
+
| Backend LLM | Gemini Flash 1.5 (via API) |
|
92 |
| Embedding Model | Hugging Face (`multilingual-e5-base`) |
|
93 |
| Vector Store | MongoDB Atlas Vector Search |
|
94 |
| Vector Engine | LlamaIndex VectorStoreIndex |
|
|
|
102 |
### โ
**Features Implemented**
|
103 |
|
104 |
* [x] Vector search using MongoDB Atlas
|
105 |
+
* [x] Hugging Face embedding integration
|
106 |
+
* [x] Gemini Flash 1.5 LLM integration
|
107 |
+
* [x] Structured prompts per scripture
|
108 |
+
* [x] Tabbed Gradio UI for easy switching
|
109 |
+
* [x] Collapsible summaries for each section
|
110 |
+
* [x] Query logging with timestamp
|
111 |
|
112 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
"""
|
115 |
|