import gradio as gr from phi.agent import Agent from phi.model.groq import Groq import os import logging from sentence_transformers import CrossEncoder from backend.semantic_search import table, retriever import numpy as np from time import perf_counter import requests # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # API Key setup api_key = os.getenv("GROQ_API_KEY") if not api_key: gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.") logger.error("GROQ_API_KEY not found.") api_key = "" # Fallback to empty string, but this will fail without a key else: os.environ["GROQ_API_KEY"] = api_key # Bhashini API setup bhashini_api_key = os.getenv("API_KEY") bhashini_user_id = os.getenv("USER_ID") def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict: """Translates text from source language to target language using the Bhashini API.""" if not text.strip(): print('Input text is empty. Please provide valid text for translation.') return {"status_code": 400, "message": "Input text is empty", "translated_content": None} else: print('Input text - ', text) print(f'Starting translation process from {from_code} to {to_code}...') gr.Warning(f'Translating to {to_code}...') url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline' headers = { "Content-Type": "application/json", "userID": bhashini_user_id, "ulcaApiKey": bhashini_api_key } payload = { "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}], "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"} } print('Sending initial request to get the pipeline...') response = requests.post(url, json=payload, headers=headers) if response.status_code != 200: print(f'Error in initial request: {response.status_code}, Response: {response.text}') return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None} print('Initial request successful, processing response...') response_data = response.json() print('Full response data:', response_data) # Debug the full response if "pipelineInferenceAPIEndPoint" not in response_data or "callbackUrl" not in response_data["pipelineInferenceAPIEndPoint"]: print('Unexpected response structure:', response_data) return {"status_code": 400, "message": "Unexpected API response structure", "translated_content": None} service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"] callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"] print(f'Service ID: {service_id}, Callback URL: {callback_url}') headers2 = { "Content-Type": "application/json", response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"] } compute_payload = { "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}], "inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]} } print(f'Sending translation request with text: "{text}"') compute_response = requests.post(callback_url, json=compute_payload, headers=headers2) if compute_response.status_code != 200: print(f'Error in translation request: {compute_response.status_code}, Response: {compute_response.text}') return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None} print('Translation request successful, processing translation...') compute_response_data = compute_response.json() translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"] print(f'Translation successful. Translated content: "{translated_content}"') return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content} # Initialize PhiData Agent agent = Agent( name="Science Education Assistant", role="You are a helpful science tutor for 10th-grade students", instructions=[ "You are an expert science teacher specializing in 10th-grade curriculum.", "Provide clear, accurate, and age-appropriate explanations.", "Use simple language and examples that students can understand.", "Focus on concepts from physics, chemistry, and biology.", "Structure responses with headings and bullet points when helpful.", "Encourage learning and curiosity." ], model=Groq(id="llama3-70b-8192", api_key=api_key), markdown=True ) # Response Generation Function def retrieve_and_generate_response(query, cross_encoder_choice, history=None): """Generate response using semantic search and LLM""" top_rerank = 25 top_k_rank = 20 if not query.strip(): return "Please provide a valid question." try: start_time = perf_counter() # Encode query and search documents query_vec = retriever.encode(query) documents = table.search(query_vec, vector_column_name="vector").limit(top_rerank).to_list() documents = [doc["text"] for doc in documents] # Re-rank documents using cross-encoder cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base') if cross_encoder_choice == '(ACCURATE) BGE reranker' else CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') query_doc_pair = [[query, doc] for doc in documents] cross_scores = cross_encoder_model.predict(query_doc_pair) sim_scores_argsort = list(reversed(np.argsort(cross_scores))) documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]] # Create context from top documents context = "\n\n".join(documents[:10]) if documents else "" context = f"Context information from educational materials:\n{context}\n\n" # Add conversation history for context history_context = "" if history and len(history) > 0: for user_msg, bot_msg in history[-2:]: # Last 2 exchanges if user_msg and bot_msg: history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n" # Create full prompt full_prompt = f"{history_context}{context}Question: {query}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about 10th-grade science topics." # Generate response response = agent.run(full_prompt) response_text = response.content if hasattr(response, 'content') else str(response) logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds") return response_text except Exception as e: logger.error(f"Error in response generation: {e}") return f"Error generating response: {str(e)}" def simple_chat_function(message, history, cross_encoder_choice): """Chat function with semantic search and retriever integration""" if not message.strip(): return "", history # Generate response using the semantic search function response = retrieve_and_generate_response(message, cross_encoder_choice, history) # Add to history history.append([message, response]) return "", history def translate_text(selected_language, history): """Translate the last response in history to the selected language.""" iso_language_codes = { "Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur", "Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr", "Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni", "Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or" } to_code = iso_language_codes[selected_language] response_text = history[-1][1] if history and history[-1][1] else '' print('response_text for translation', response_text) translation = bhashini_translate(response_text, to_code=to_code) return translation.get('translated_content', 'Translation failed.') # Gradio Interface with layout template with gr.Blocks(title="Science Chatbot", theme='gradio/soft') as demo: # Header section with gr.Row(): with gr.Column(scale=10): gr.HTML(value="""
A free chat bot developed by K.M.RAMYASRI,TGT,GHS.SUTHUKENY using Open source LLMs for 10 std students
""") gr.HTML(value=f"""Suggestions may be sent to ramyadevi1607@yahoo.com.
""") with gr.Column(scale=3): try: gr.Image(value='logo.png', height=200, width=200) except: gr.HTML("A free chat bot developed by K.M.RAMYASRI,TGT,GHS.SUTHUKENY using Open source LLMs for 10 std students
""") # gr.HTML(value=f"""Suggestions may be sent to ramyadevi1607@yahoo.com.
""") # with gr.Column(scale=3): # gr.Image(value='logo.png', height=200, width=200) # chatbot = gr.Chatbot( # [], # elem_id="chatbot", # avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg', # 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'), # bubble_full_width=False, # show_copy_button=True, # show_share_button=True, # ) # with gr.Row(): # txt = gr.Textbox( # scale=3, # show_label=False, # placeholder="Enter text and press enter", # container=False, # ) # txt_btn = gr.Button(value="Submit text", scale=1) # cross_encoder = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], value='(ACCURATE) BGE reranker', label="Embeddings", info="Only First query to Colbert may take little time)") # language_dropdown = gr.Dropdown( # choices=[ # "Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi", # "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali", # "Gujarati", "Odia" # ], # value="Hindi", # default to Hindi # label="Select Language for Translation" # ) # prompt_html = gr.HTML() # translated_textbox = gr.Textbox(label="Translated Response") # def update_history_and_translate(txt, cross_encoder, history_state, language_dropdown): # print('History state',history_state) # history = history_state # history.append((txt, "")) # #history_state.value=(history) # # Call bot function # # bot_output = list(bot(history, cross_encoder)) # bot_output = next(bot(history, cross_encoder)) # print('bot_output',bot_output) # #history, prompt_html = bot_output[-1] # history, prompt_html = bot_output # print('History',history) # # Update the history state # history_state[:] = history # # Translate text # translated_text = translate_text(language_dropdown, history) # return history, prompt_html, translated_text # txt_msg = txt_btn.click(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox]) # txt_msg = txt.submit(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox]) # examples = ['WHAT IS DIFFERENCES BETWEEN HOMOGENOUS AND HETEROGENOUS MIXTURE?','WHAT IS COVALENT BOND?', # 'EXPLAIN GOLGI APPARATUS'] # gr.Examples(examples, txt) # # Launch the Gradio application # CHATBOT.launch(share=True,debug=True)