SciLinguaBot_9 / app.py
RAMYASRI-39's picture
Update app.py
f3c5e9a verified
raw
history blame
25.7 kB
import gradio as gr
from phi.agent import Agent
from phi.model.groq import Groq
import os
import logging
from sentence_transformers import CrossEncoder
from backend.semantic_search import table, retriever
import numpy as np
from time import perf_counter
import requests
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# API Key setup
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.")
logger.error("GROQ_API_KEY not found.")
api_key = "" # Fallback to empty string, but this will fail without a key
else:
os.environ["GROQ_API_KEY"] = api_key
# Bhashini API setup
bhashini_api_key = os.getenv("API_KEY")
bhashini_user_id = os.getenv("USER_ID")
def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
"""Translates text from source language to target language using the Bhashini API."""
if not text.strip():
print('Input text is empty. Please provide valid text for translation.')
return {"status_code": 400, "message": "Input text is empty", "translated_content": None}
else:
print('Input text - ', text)
print(f'Starting translation process from {from_code} to {to_code}...')
gr.Warning(f'Translating to {to_code}...')
url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline'
headers = {
"Content-Type": "application/json",
"userID": bhashini_user_id,
"ulcaApiKey": bhashini_api_key
}
payload = {
"pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
"pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
}
print('Sending initial request to get the pipeline...')
response = requests.post(url, json=payload, headers=headers)
if response.status_code != 200:
print(f'Error in initial request: {response.status_code}, Response: {response.text}')
return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None}
print('Initial request successful, processing response...')
response_data = response.json()
print('Full response data:', response_data) # Debug the full response
if "pipelineInferenceAPIEndPoint" not in response_data or "callbackUrl" not in response_data["pipelineInferenceAPIEndPoint"]:
print('Unexpected response structure:', response_data)
return {"status_code": 400, "message": "Unexpected API response structure", "translated_content": None}
service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"]
callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]
print(f'Service ID: {service_id}, Callback URL: {callback_url}')
headers2 = {
"Content-Type": "application/json",
response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
}
compute_payload = {
"pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}],
"inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]}
}
print(f'Sending translation request with text: "{text}"')
compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)
if compute_response.status_code != 200:
print(f'Error in translation request: {compute_response.status_code}, Response: {compute_response.text}')
return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None}
print('Translation request successful, processing translation...')
compute_response_data = compute_response.json()
translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
print(f'Translation successful. Translated content: "{translated_content}"')
return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
# Initialize PhiData Agent
agent = Agent(
name="Science Education Assistant",
role="You are a helpful science tutor for 10th-grade students",
instructions=[
"You are an expert science teacher specializing in 10th-grade curriculum.",
"Provide clear, accurate, and age-appropriate explanations.",
"Use simple language and examples that students can understand.",
"Focus on concepts from physics, chemistry, and biology.",
"Structure responses with headings and bullet points when helpful.",
"Encourage learning and curiosity."
],
model=Groq(id="llama3-70b-8192", api_key=api_key),
markdown=True
)
# Response Generation Function
def retrieve_and_generate_response(query, cross_encoder_choice, history=None):
"""Generate response using semantic search and LLM"""
top_rerank = 25
top_k_rank = 20
if not query.strip():
return "Please provide a valid question."
try:
start_time = perf_counter()
# Encode query and search documents
query_vec = retriever.encode(query)
documents = table.search(query_vec, vector_column_name="vector").limit(top_rerank).to_list()
documents = [doc["text"] for doc in documents]
# Re-rank documents using cross-encoder
cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base') if cross_encoder_choice == '(ACCURATE) BGE reranker' else CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
query_doc_pair = [[query, doc] for doc in documents]
cross_scores = cross_encoder_model.predict(query_doc_pair)
sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
# Create context from top documents
context = "\n\n".join(documents[:10]) if documents else ""
context = f"Context information from educational materials:\n{context}\n\n"
# Add conversation history for context
history_context = ""
if history and len(history) > 0:
for user_msg, bot_msg in history[-2:]: # Last 2 exchanges
if user_msg and bot_msg:
history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n"
# Create full prompt
full_prompt = f"{history_context}{context}Question: {query}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about 10th-grade science topics."
# Generate response
response = agent.run(full_prompt)
response_text = response.content if hasattr(response, 'content') else str(response)
logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds")
return response_text
except Exception as e:
logger.error(f"Error in response generation: {e}")
return f"Error generating response: {str(e)}"
def simple_chat_function(message, history, cross_encoder_choice):
"""Chat function with semantic search and retriever integration"""
if not message.strip():
return "", history
# Generate response using the semantic search function
response = retrieve_and_generate_response(message, cross_encoder_choice, history)
# Add to history
history.append([message, response])
return "", history
def translate_text(selected_language, history):
"""Translate the last response in history to the selected language."""
iso_language_codes = {
"Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur",
"Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr",
"Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni",
"Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or"
}
to_code = iso_language_codes[selected_language]
response_text = history[-1][1] if history and history[-1][1] else ''
print('response_text for translation', response_text)
translation = bhashini_translate(response_text, to_code=to_code)
return translation.get('translated_content', 'Translation failed.')
# Gradio Interface with layout template
with gr.Blocks(title="Science Chatbot", theme='gradio/soft') as demo:
# Header section
with gr.Row():
with gr.Column(scale=10):
gr.HTML(value="""<div style="color: #FF4500;"><h1>Welcome! I am your friend!</h1>Ask me !I will help you<h1><span style="color: #008000">I AM A CHATBOT FOR 10TH SCIENCE WITH TRANSLATION IN 22 LANGUAGES</span></h1></div>""")
gr.HTML(value=f"""<p style="font-family: sans-serif; font-size: 16px;">A free chat bot developed by K.M.RAMYASRI,TGT,GHS.SUTHUKENY using Open source LLMs for 10 std students</p>""")
gr.HTML(value=f"""<p style="font-family: Arial, sans-serif; font-size: 14px;"> Suggestions may be sent to <a href="mailto:[email protected]" style="color: #00008B; font-style: italic;">[email protected]</a>.</p>""")
with gr.Column(scale=3):
try:
gr.Image(value='logo.png', height=200, width=200)
except:
gr.HTML("<div style='height: 200px; width: 200px; background-color: #f0f0f0; display: flex; align-items: center; justify-content: center;'>Logo</div>")
# Chat and input components
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
bubble_full_width=False,
show_copy_button=True,
show_share_button=True,
)
with gr.Row():
msg = gr.Textbox(
scale=3,
show_label=False,
placeholder="Enter text and press enter",
container=False,
)
submit_btn = gr.Button(value="Submit text", scale=1, variant="primary")
# Additional controls
cross_encoder = gr.Radio(
choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker'],
value='(ACCURATE) BGE reranker',
label="Embeddings Model",
info="Select the model for document ranking"
)
language_dropdown = gr.Dropdown(
choices=[
"Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi",
"Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
"Gujarati", "Odia"
],
value="Hindi",
label="Select Language for Translation"
)
translated_textbox = gr.Textbox(label="Translated Response")
# Event handlers
def update_chat_and_translate(message, history, cross_encoder_choice, selected_language):
if not message.strip():
return "", history, ""
# Generate response
response = retrieve_and_generate_response(message, cross_encoder_choice, history)
history.append([message, response])
# Translate response
translated_text = translate_text(selected_language, history)
return "", history, translated_text
msg.submit(update_chat_and_translate, [msg, chatbot, cross_encoder, language_dropdown], [msg, chatbot, translated_textbox])
submit_btn.click(update_chat_and_translate, [msg, chatbot, cross_encoder, language_dropdown], [msg, chatbot, translated_textbox])
clear = gr.Button("Clear Conversation")
clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, translated_textbox])
# Example questions
gr.Examples(
examples=[
'What is the difference between metals and non-metals?',
'What is an ionic bond?',
'Explain asexual reproduction',
'What is photosynthesis?',
'Explain Newton\'s laws of motion'
],
inputs=msg,
label="Try these example questions:"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)# import gradio as gr# import requests
# import gradio as gr
# from ragatouille import RAGPretrainedModel
# import logging
# from pathlib import Path
# from time import perf_counter
# from sentence_transformers import CrossEncoder
# from huggingface_hub import InferenceClient
# from jinja2 import Environment, FileSystemLoader
# import numpy as np
# from os import getenv
# from backend.query_llm import generate_hf, generate_qwen
# from backend.semantic_search import table, retriever
# from huggingface_hub import InferenceClient
# # Bhashini API translation function
# api_key = getenv('API_KEY')
# user_id = getenv('USER_ID')
# def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
# """Translates text from source language to target language using the Bhashini API."""
# if not text.strip():
# print('Input text is empty. Please provide valid text for translation.')
# return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
# else:
# print('Input text - ',text)
# print(f'Starting translation process from {from_code} to {to_code}...')
# print(f'Starting translation process from {from_code} to {to_code}...')
# gr.Warning(f'Translating to {to_code}...')
# url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline'
# headers = {
# "Content-Type": "application/json",
# "userID": user_id,
# "ulcaApiKey": api_key
# }
# payload = {
# "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
# "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
# }
# print('Sending initial request to get the pipeline...')
# response = requests.post(url, json=payload, headers=headers)
# if response.status_code != 200:
# print(f'Error in initial request: {response.status_code}')
# return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None}
# print('Initial request successful, processing response...')
# response_data = response.json()
# service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"]
# callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]
# print(f'Service ID: {service_id}, Callback URL: {callback_url}')
# headers2 = {
# "Content-Type": "application/json",
# response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
# }
# compute_payload = {
# "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}],
# "inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]}
# }
# print(f'Sending translation request with text: "{text}"')
# compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)
# if compute_response.status_code != 200:
# print(f'Error in translation request: {compute_response.status_code}')
# return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None}
# print('Translation request successful, processing translation...')
# compute_response_data = compute_response.json()
# translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
# print(f'Translation successful. Translated content: "{translated_content}"')
# return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
# # Existing chatbot functions
# VECTOR_COLUMN_NAME = "vector"
# TEXT_COLUMN_NAME = "text"
# HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
# proj_dir = Path(__file__).parent
# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)
# client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
# env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
# template = env.get_template('template.j2')
# template_html = env.get_template('template_html.j2')
# # def add_text(history, text):
# # history = [] if history is None else history
# # history = history + [(text, None)]
# # return history, gr.Textbox(value="", interactive=False)
# def bot(history, cross_encoder):
# top_rerank = 25
# top_k_rank = 20
# query = history[-1][0] if history else ''
# print('\nQuery: ',query )
# print('\nHistory:',history)
# if not query:
# gr.Warning("Please submit a non-empty string as a prompt")
# raise ValueError("Empty string was submitted")
# logger.warning('Retrieving documents...')
# if cross_encoder == '(HIGH ACCURATE) ColBERT':
# gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait')
# RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
# RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
# documents_full = RAG_db.search(query, k=top_k_rank)
# documents = [item['content'] for item in documents_full]
# prompt = template.render(documents=documents, query=query)
# prompt_html = template_html.render(documents=documents, query=query)
# generate_fn = generate_hf
# history[-1][1] = ""
# for character in generate_fn(prompt, history[:-1]):
# history[-1][1] = character
# yield history, prompt_html
# else:
# document_start = perf_counter()
# query_vec = retriever.encode(query)
# doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
# documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
# documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
# query_doc_pair = [[query, doc] for doc in documents]
# if cross_encoder == '(FAST) MiniLM-L6v2':
# cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
# elif cross_encoder == '(ACCURATE) BGE reranker':
# cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
# cross_scores = cross_encoder1.predict(query_doc_pair)
# sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
# documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
# document_time = perf_counter() - document_start
# prompt = template.render(documents=documents, query=query)
# prompt_html = template_html.render(documents=documents, query=query)
# #generate_fn = generate_hf
# generate_fn=generate_qwen
# # Create a new history entry instead of modifying the tuple directly
# new_history = history[:-1] + [ (prompt, "") ] # query replaced prompt
# output=''
# # for character in generate_fn(prompt, history[:-1]):
# # #new_history[-1] = (query, character)
# # output+=character
# output=generate_fn(prompt, history[:-1])
# print('Output:',output)
# new_history[-1] = (prompt, output) #query replaced with prompt
# print('New History',new_history)
# #print('prompt html',prompt_html)# Update the last tuple with new text
# history_list = list(history[-1])
# history_list[1] = output # Assuming `character` is what you want to assign
# # Update the history with the modified list converted back to a tuple
# history[-1] = tuple(history_list)
# #history[-1][1] = character
# # yield new_history, prompt_html
# yield history, prompt_html
# # new_history,prompt_html
# # history[-1][1] = ""
# # for character in generate_fn(prompt, history[:-1]):
# # history[-1][1] = character
# # yield history, prompt_html
# #def translate_text(response_text, selected_language):
# def translate_text(selected_language,history):
# iso_language_codes = {
# "Hindi": "hi",
# "Gom": "gom",
# "Kannada": "kn",
# "Dogri": "doi",
# "Bodo": "brx",
# "Urdu": "ur",
# "Tamil": "ta",
# "Kashmiri": "ks",
# "Assamese": "as",
# "Bengali": "bn",
# "Marathi": "mr",
# "Sindhi": "sd",
# "Maithili": "mai",
# "Punjabi": "pa",
# "Malayalam": "ml",
# "Manipuri": "mni",
# "Telugu": "te",
# "Sanskrit": "sa",
# "Nepali": "ne",
# "Santali": "sat",
# "Gujarati": "gu",
# "Odia": "or"
# }
# to_code = iso_language_codes[selected_language]
# response_text = history[-1][1] if history else ''
# print('response_text for translation',response_text)
# translation = bhashini_translate(response_text, to_code=to_code)
# return translation['translated_content']
# # Gradio interface
# with gr.Blocks(theme='gradio/soft') as CHATBOT:
# history_state = gr.State([])
# with gr.Row():
# with gr.Column(scale=10):
# gr.HTML(value="""<div style="color: #FF4500;"><h1>Welcome! I am your friend!</h1>Ask me !I will help you<h1><span style="color: #008000">I AM A CHATBOT FOR 9 SCIENCE WITH TRANSLATION IN 22 LANGUAGES</span></h1></div>""")
# gr.HTML(value=f"""<p style="font-family: sans-serif; font-size: 16px;">A free chat bot developed by K.M.RAMYASRI,TGT,GHS.SUTHUKENY using Open source LLMs for 10 std students</p>""")
# gr.HTML(value=f"""<p style="font-family: Arial, sans-serif; font-size: 14px;"> Suggestions may be sent to <a href="mailto:[email protected]" style="color: #00008B; font-style: italic;">[email protected]</a>.</p>""")
# with gr.Column(scale=3):
# gr.Image(value='logo.png', height=200, width=200)
# chatbot = gr.Chatbot(
# [],
# elem_id="chatbot",
# avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
# 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
# bubble_full_width=False,
# show_copy_button=True,
# show_share_button=True,
# )
# with gr.Row():
# txt = gr.Textbox(
# scale=3,
# show_label=False,
# placeholder="Enter text and press enter",
# container=False,
# )
# txt_btn = gr.Button(value="Submit text", scale=1)
# cross_encoder = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], value='(ACCURATE) BGE reranker', label="Embeddings", info="Only First query to Colbert may take little time)")
# language_dropdown = gr.Dropdown(
# choices=[
# "Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi",
# "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
# "Gujarati", "Odia"
# ],
# value="Hindi", # default to Hindi
# label="Select Language for Translation"
# )
# prompt_html = gr.HTML()
# translated_textbox = gr.Textbox(label="Translated Response")
# def update_history_and_translate(txt, cross_encoder, history_state, language_dropdown):
# print('History state',history_state)
# history = history_state
# history.append((txt, ""))
# #history_state.value=(history)
# # Call bot function
# # bot_output = list(bot(history, cross_encoder))
# bot_output = next(bot(history, cross_encoder))
# print('bot_output',bot_output)
# #history, prompt_html = bot_output[-1]
# history, prompt_html = bot_output
# print('History',history)
# # Update the history state
# history_state[:] = history
# # Translate text
# translated_text = translate_text(language_dropdown, history)
# return history, prompt_html, translated_text
# txt_msg = txt_btn.click(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
# txt_msg = txt.submit(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
# examples = ['WHAT IS DIFFERENCES BETWEEN HOMOGENOUS AND HETEROGENOUS MIXTURE?','WHAT IS COVALENT BOND?',
# 'EXPLAIN GOLGI APPARATUS']
# gr.Examples(examples, txt)
# # Launch the Gradio application
# CHATBOT.launch(share=True,debug=True)