NCTCMumbai commited on
Commit
8ed665f
·
verified ·
1 Parent(s): 895f730

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +430 -159
app.py CHANGED
@@ -10,6 +10,7 @@ from os import getenv
10
  import requests
11
  from jinja2 import Environment, FileSystemLoader
12
  from backend.semantic_search import table, retriever
 
13
  # Bhashini API translation function
14
  api_key = getenv('API_KEY', '').strip()
15
  user_id = getenv('USER_ID', '').strip()
@@ -107,7 +108,7 @@ agent = Agent(
107
  "Use simple language and examples relevant to customs officers.",
108
  "Focus on topics like transhipment, AEO schemes, bonds, penalties, and CFS approvals.",
109
  "Structure responses with headings and bullet points when helpful.",
110
- "If you dont know the answer, say 'I dont have enough information to answer that.'"
111
  ],
112
  model=Groq(id="llama3-70b-8192", api_key=api_key),
113
  markdown=True
@@ -196,7 +197,6 @@ def translate_text(selected_language, history):
196
 
197
  # Gradio interface
198
  with gr.Blocks(theme='gradio/soft') as CHATBOT:
199
- history_state = gr.State([])
200
  with gr.Row():
201
  with gr.Column(scale=10):
202
  gr.HTML(value="""<div style="color: #FF4500;"><h1>ADWITIYA-</h1> <h1><span style="color: #008000">Custom Manual Chatbot </span></h1></div>""")
@@ -239,20 +239,22 @@ with gr.Blocks(theme='gradio/soft') as CHATBOT:
239
  prompt_html = gr.HTML()
240
  translated_textbox = gr.Textbox(label="Translated Response")
241
 
242
- def update_chat_and_translate(txt, cross_encoder, history_state, language_dropdown):
243
- history = history_state.value if history_state.value else []
244
- history.append((txt, ""))
 
245
 
246
  # Call simple_chat_function
247
- msg, history, prompt_html_content = simple_chat_function(txt, history, cross_encoder)
248
 
249
  # Translate text
250
- translated_text = translate_text(language_dropdown, history)
251
 
252
- return history, prompt_html_content, translated_text
253
 
254
- txt_msg = txt_btn.click(update_chat_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
255
- txt_msg = txt.submit(update_chat_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
 
256
 
257
  examples = [
258
  'My transhipment cargo is missing',
@@ -267,35 +269,29 @@ with gr.Blocks(theme='gradio/soft') as CHATBOT:
267
  gr.Examples(examples, txt)
268
 
269
  # Launch the Gradio application
270
- CHATBOT.launch(share=True, debug=True)# import requests
271
- # import gradio as gr
272
- # from ragatouille import RAGPretrainedModel
273
  # import logging
274
  # from pathlib import Path
275
  # from time import perf_counter
276
  # from sentence_transformers import CrossEncoder
277
- # from huggingface_hub import InferenceClient
278
- # from jinja2 import Environment, FileSystemLoader
279
  # import numpy as np
280
  # from os import getenv
281
- # from backend.query_llm import generate_hf, generate_qwen
 
282
  # from backend.semantic_search import table, retriever
283
- # from huggingface_hub import InferenceClient
284
-
285
-
286
  # # Bhashini API translation function
287
- # api_key = getenv('API_KEY')
288
- # user_id = getenv('USER_ID')
289
 
290
  # def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
291
  # """Translates text from source language to target language using the Bhashini API."""
292
-
293
  # if not text.strip():
294
  # print('Input text is empty. Please provide valid text for translation.')
295
  # return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
296
  # else:
297
- # print('Input text - ',text)
298
- # print(f'Starting translation process from {from_code} to {to_code}...')
299
  # print(f'Starting translation process from {from_code} to {to_code}...')
300
  # gr.Warning(f'Translating to {to_code}...')
301
 
@@ -305,6 +301,11 @@ CHATBOT.launch(share=True, debug=True)# import requests
305
  # "userID": user_id,
306
  # "ulcaApiKey": api_key
307
  # }
 
 
 
 
 
308
  # payload = {
309
  # "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
310
  # "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
@@ -347,7 +348,6 @@ CHATBOT.launch(share=True, debug=True)# import requests
347
  # print(f'Translation successful. Translated content: "{translated_content}"')
348
  # return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
349
 
350
-
351
  # # Existing chatbot functions
352
  # VECTOR_COLUMN_NAME = "vector"
353
  # TEXT_COLUMN_NAME = "text"
@@ -356,136 +356,114 @@ CHATBOT.launch(share=True, debug=True)# import requests
356
 
357
  # logging.basicConfig(level=logging.INFO)
358
  # logger = logging.getLogger(__name__)
359
- # client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
360
- # proj_dir = Path(__file__).parent
361
- # env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
362
 
 
 
363
  # template = env.get_template('template.j2')
364
  # template_html = env.get_template('template_html.j2')
365
 
366
- # # def add_text(history, text):
367
- # # history = [] if history is None else history
368
- # # history = history + [(text, None)]
369
- # # return history, gr.Textbox(value="", interactive=False)
370
-
371
- # def bot(history, cross_encoder):
372
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  # top_rerank = 25
374
  # top_k_rank = 20
375
- # query = history[-1][0] if history else ''
376
- # print('\nQuery: ',query )
377
- # print('\nHistory:',history)
378
- # if not query:
379
- # gr.Warning("Please submit a non-empty string as a prompt")
380
- # raise ValueError("Empty string was submitted")
381
-
382
- # logger.warning('Retrieving documents...')
383
-
384
- # if cross_encoder == '(HIGH ACCURATE) ColBERT':
385
- # gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait')
386
- # RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
387
- # RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
388
- # documents_full = RAG_db.search(query, k=top_k_rank)
389
-
390
- # documents = [item['content'] for item in documents_full]
391
- # prompt = template.render(documents=documents, query=query)
392
- # prompt_html = template_html.render(documents=documents, query=query)
393
-
394
- # generate_fn = generate_hf
395
-
396
- # history[-1][1] = ""
397
- # for character in generate_fn(prompt, history[:-1]):
398
- # history[-1][1] = character
399
- # yield history, prompt_html
400
- # else:
401
- # document_start = perf_counter()
402
-
403
- # query_vec = retriever.encode(query)
404
- # doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
405
 
 
 
 
 
 
406
  # documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
407
  # documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
408
-
409
- # query_doc_pair = [[query, doc] for doc in documents]
410
- # if cross_encoder == '(FAST) MiniLM-L6v2':
411
- # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
412
- # elif cross_encoder == '(ACCURATE) BGE reranker':
413
- # cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
414
 
415
- # cross_scores = cross_encoder1.predict(query_doc_pair)
416
- # sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
- # documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
419
-
420
- # document_time = perf_counter() - document_start
421
-
422
- # prompt = template.render(documents=documents, query=query)
423
- # prompt_html = template_html.render(documents=documents, query=query)
424
-
425
- # #generate_fn = generate_hf
426
- # generate_fn=generate_qwen
427
- # # Create a new history entry instead of modifying the tuple directly
428
- # new_history = history[:-1] + [ (prompt, "") ] # query replaced prompt
429
- # output=''
430
- # # for character in generate_fn(prompt, history[:-1]):
431
- # # #new_history[-1] = (query, character)
432
- # # output+=character
433
- # output=generate_fn(prompt, history[:-1])
434
 
435
- # print('Output:',output)
436
- # new_history[-1] = (prompt, output) #query replaced with prompt
437
- # print('New History',new_history)
438
- # #print('prompt html',prompt_html)# Update the last tuple with new text
 
 
439
 
440
- # history_list = list(history[-1])
441
- # history_list[1] = output # Assuming `character` is what you want to assign
442
- # # Update the history with the modified list converted back to a tuple
443
- # history[-1] = tuple(history_list)
444
-
445
- # #history[-1][1] = character
446
- # # yield new_history, prompt_html
447
- # yield history, prompt_html
448
- # # new_history,prompt_html
449
- # # history[-1][1] = ""
450
- # # for character in generate_fn(prompt, history[:-1]):
451
- # # history[-1][1] = character
452
- # # yield history, prompt_html
453
-
454
- # #def translate_text(response_text, selected_language):
455
-
456
- # def translate_text(selected_language,history):
457
 
 
 
 
 
 
 
458
  # iso_language_codes = {
459
- # "Hindi": "hi",
460
- # "Gom": "gom",
461
- # "Kannada": "kn",
462
- # "Dogri": "doi",
463
- # "Bodo": "brx",
464
- # "Urdu": "ur",
465
- # "Tamil": "ta",
466
- # "Kashmiri": "ks",
467
- # "Assamese": "as",
468
- # "Bengali": "bn",
469
- # "Marathi": "mr",
470
- # "Sindhi": "sd",
471
- # "Maithili": "mai",
472
- # "Punjabi": "pa",
473
- # "Malayalam": "ml",
474
- # "Manipuri": "mni",
475
- # "Telugu": "te",
476
- # "Sanskrit": "sa",
477
- # "Nepali": "ne",
478
- # "Santali": "sat",
479
- # "Gujarati": "gu",
480
- # "Odia": "or"
481
  # }
482
 
483
  # to_code = iso_language_codes[selected_language]
484
- # response_text = history[-1][1] if history else ''
485
- # print('response_text for translation',response_text)
486
  # translation = bhashini_translate(response_text, to_code=to_code)
487
- # return translation['translated_content']
488
-
489
 
490
  # # Gradio interface
491
  # with gr.Blocks(theme='gradio/soft') as CHATBOT:
@@ -525,43 +503,336 @@ CHATBOT.launch(share=True, debug=True)# import requests
525
  # "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
526
  # "Gujarati", "Odia"
527
  # ],
528
- # value="Hindi", # default to Hindi
529
  # label="Select Language for Translation"
530
  # )
531
 
532
  # prompt_html = gr.HTML()
533
-
534
  # translated_textbox = gr.Textbox(label="Translated Response")
535
- # def update_history_and_translate(txt, cross_encoder, history_state, language_dropdown):
536
- # print('History state',history_state)
537
- # history = history_state
538
  # history.append((txt, ""))
539
- # #history_state.value=(history)
540
 
541
- # # Call bot function
542
- # # bot_output = list(bot(history, cross_encoder))
543
- # bot_output = next(bot(history, cross_encoder))
544
- # print('bot_output',bot_output)
545
- # #history, prompt_html = bot_output[-1]
546
- # history, prompt_html = bot_output
547
- # print('History',history)
548
- # # Update the history state
549
- # history_state[:] = history
550
 
551
  # # Translate text
552
  # translated_text = translate_text(language_dropdown, history)
553
- # return history, prompt_html, translated_text
 
554
 
555
- # txt_msg = txt_btn.click(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
556
- # txt_msg = txt.submit(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
557
 
558
- # examples = ['My transhipment cargo is missing','can u explain and tabulate difference between b 17 bond and a warehousing bond',
559
- # 'What are benefits of the AEO Scheme and eligibility criteria?',
560
- # 'What are penalties for customs offences? ', 'what are penalties to customs officers misusing their powers under customs act?','What are eligibility criteria for exemption from cost recovery charges','list in detail what is procedure for obtaining new approval for openeing a CFS attached to an ICD']
 
 
 
 
 
 
561
 
562
  # gr.Examples(examples, txt)
563
 
564
-
565
  # # Launch the Gradio application
566
- # CHATBOT.launch(share=True,debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
 
 
10
  import requests
11
  from jinja2 import Environment, FileSystemLoader
12
  from backend.semantic_search import table, retriever
13
+
14
  # Bhashini API translation function
15
  api_key = getenv('API_KEY', '').strip()
16
  user_id = getenv('USER_ID', '').strip()
 
108
  "Use simple language and examples relevant to customs officers.",
109
  "Focus on topics like transhipment, AEO schemes, bonds, penalties, and CFS approvals.",
110
  "Structure responses with headings and bullet points when helpful.",
111
+ "If you don't know the answer, say 'I don't have enough information to answer that.'"
112
  ],
113
  model=Groq(id="llama3-70b-8192", api_key=api_key),
114
  markdown=True
 
197
 
198
  # Gradio interface
199
  with gr.Blocks(theme='gradio/soft') as CHATBOT:
 
200
  with gr.Row():
201
  with gr.Column(scale=10):
202
  gr.HTML(value="""<div style="color: #FF4500;"><h1>ADWITIYA-</h1> <h1><span style="color: #008000">Custom Manual Chatbot </span></h1></div>""")
 
239
  prompt_html = gr.HTML()
240
  translated_textbox = gr.Textbox(label="Translated Response")
241
 
242
+ def update_chat_and_translate(txt, cross_encoder, history, language_dropdown):
243
+ # Fixed: history is now directly used instead of history_state.value
244
+ if not history:
245
+ history = []
246
 
247
  # Call simple_chat_function
248
+ msg, updated_history, prompt_html_content = simple_chat_function(txt, history, cross_encoder)
249
 
250
  # Translate text
251
+ translated_text = translate_text(language_dropdown, updated_history)
252
 
253
+ return updated_history, prompt_html_content, translated_text
254
 
255
+ # Fixed: Pass chatbot directly instead of history_state
256
+ txt_msg = txt_btn.click(update_chat_and_translate, [txt, cross_encoder, chatbot, language_dropdown], [chatbot, prompt_html, translated_textbox])
257
+ txt_msg = txt.submit(update_chat_and_translate, [txt, cross_encoder, chatbot, language_dropdown], [chatbot, prompt_html, translated_textbox])
258
 
259
  examples = [
260
  'My transhipment cargo is missing',
 
269
  gr.Examples(examples, txt)
270
 
271
  # Launch the Gradio application
272
+ CHATBOT.launch(share=True, debug=True)# import gradio as gr
273
+ # from phi.agent import Agent
274
+ # from phi.model.groq import Groq
275
  # import logging
276
  # from pathlib import Path
277
  # from time import perf_counter
278
  # from sentence_transformers import CrossEncoder
 
 
279
  # import numpy as np
280
  # from os import getenv
281
+ # import requests
282
+ # from jinja2 import Environment, FileSystemLoader
283
  # from backend.semantic_search import table, retriever
 
 
 
284
  # # Bhashini API translation function
285
+ # api_key = getenv('API_KEY', '').strip()
286
+ # user_id = getenv('USER_ID', '').strip()
287
 
288
  # def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
289
  # """Translates text from source language to target language using the Bhashini API."""
 
290
  # if not text.strip():
291
  # print('Input text is empty. Please provide valid text for translation.')
292
  # return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
293
  # else:
294
+ # print('Input text - ', text)
 
295
  # print(f'Starting translation process from {from_code} to {to_code}...')
296
  # gr.Warning(f'Translating to {to_code}...')
297
 
 
301
  # "userID": user_id,
302
  # "ulcaApiKey": api_key
303
  # }
304
+ # for key, value in headers.items():
305
+ # if not isinstance(value, str) or '\n' in value or '\r' in value:
306
+ # print(f"Invalid header value for {key}: {value}")
307
+ # return {"status_code": 400, "message": f"Invalid header value for {key}", "translated_content": None}
308
+
309
  # payload = {
310
  # "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
311
  # "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
 
348
  # print(f'Translation successful. Translated content: "{translated_content}"')
349
  # return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
350
 
 
351
  # # Existing chatbot functions
352
  # VECTOR_COLUMN_NAME = "vector"
353
  # TEXT_COLUMN_NAME = "text"
 
356
 
357
  # logging.basicConfig(level=logging.INFO)
358
  # logger = logging.getLogger(__name__)
 
 
 
359
 
360
+ # # Set up Jinja2 environment
361
+ # env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
362
  # template = env.get_template('template.j2')
363
  # template_html = env.get_template('template_html.j2')
364
 
365
+ # # Initialize Grok Agent
366
+ # api_key = getenv("GROQ_API_KEY")
367
+ # if not api_key:
368
+ # gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.")
369
+ # logger.error("GROQ_API_KEY not found.")
370
+ # api_key = "" # Fallback, but will fail without a key
371
+
372
+ # agent = Agent(
373
+ # name="Customs Assistant",
374
+ # role="You are a helpful assistant for CBIC officers, providing guidance on customs procedures and regulations.",
375
+ # instructions=[
376
+ # "You are an expert in customs regulations and CBIC procedures.",
377
+ # "Provide clear, accurate, and professional explanations.",
378
+ # "Use simple language and examples relevant to customs officers.",
379
+ # "Focus on topics like transhipment, AEO schemes, bonds, penalties, and CFS approvals.",
380
+ # "Structure responses with headings and bullet points when helpful.",
381
+ # "If you don’t know the answer, say 'I don’t have enough information to answer that.'"
382
+ # ],
383
+ # model=Groq(id="llama3-70b-8192", api_key=api_key),
384
+ # markdown=True
385
+ # )
386
+
387
+ # def simple_chat_function(message, history, cross_encoder_choice):
388
+ # """Chat function with semantic search and Grok agent integration"""
389
+ # if not message.strip():
390
+ # return "", history, ""
391
+
392
  # top_rerank = 25
393
  # top_k_rank = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
+ # try:
396
+ # start_time = perf_counter()
397
+
398
+ # # Encode query and search documents
399
+ # query_vec = retriever.encode(message)
400
  # documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
401
  # documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
 
 
 
 
 
 
402
 
403
+ # # Re-rank documents using cross-encoder
404
+ # if cross_encoder_choice == '(FAST) MiniLM-L6v2':
405
+ # cross_encoder_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
406
+ # elif cross_encoder_choice == '(ACCURATE) BGE reranker':
407
+ # cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base')
408
+ # elif cross_encoder_choice == '(HIGH ACCURATE) ColBERT':
409
+ # gr.Warning('Retrieving using ColBERT.. First time query may take a minute for model to load..pls wait')
410
+ # from ragatouille import RAGPretrainedModel
411
+ # RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
412
+ # RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
413
+ # documents = [item['content'] for item in RAG_db.search(message, k=top_k_rank)]
414
+ # cross_encoder_model = None # No re-ranking needed for ColBERT
415
+
416
+ # if cross_encoder_model:
417
+ # query_doc_pair = [[message, doc] for doc in documents]
418
+ # cross_scores = cross_encoder_model.predict(query_doc_pair)
419
+ # sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
420
+ # documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
421
 
422
+ # # Create context from top documents
423
+ # context = "\n\n".join(documents[:10]) if documents else ""
424
+ # context = f"Context information from customs materials:\n{context}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
+ # # Add conversation history for context
427
+ # history_context = ""
428
+ # if history and len(history) > 0:
429
+ # for user_msg, bot_msg in history[-2:]: # Last 2 exchanges
430
+ # if user_msg and bot_msg:
431
+ # history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n"
432
 
433
+ # # Create full prompt
434
+ # full_prompt = f"{history_context}{context}Question: {message}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about CBIC customs procedures."
435
+
436
+ # # Generate response
437
+ # response = agent.run(full_prompt)
438
+ # response_text = response.content if hasattr(response, 'content') else str(response)
439
+
440
+ # # Add to history
441
+ # history.append([message, response_text])
442
+
443
+ # # Render template with documents and query
444
+ # prompt_html = template_html.render(documents=documents, query=message)
445
+
446
+ # logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds")
447
+ # return "", history, prompt_html
 
 
448
 
449
+ # except Exception as e:
450
+ # logger.error(f"Error in response generation: {e}")
451
+ # return "", history, f"Error generating response: {str(e)}"
452
+
453
+ # def translate_text(selected_language, history):
454
+ # """Translate the last response in history to the selected language."""
455
  # iso_language_codes = {
456
+ # "Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur",
457
+ # "Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr",
458
+ # "Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni",
459
+ # "Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  # }
461
 
462
  # to_code = iso_language_codes[selected_language]
463
+ # response_text = history[-1][1] if history and history[-1][1] else ''
464
+ # print('response_text for translation', response_text)
465
  # translation = bhashini_translate(response_text, to_code=to_code)
466
+ # return translation.get('translated_content', 'Translation failed.')
 
467
 
468
  # # Gradio interface
469
  # with gr.Blocks(theme='gradio/soft') as CHATBOT:
 
503
  # "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
504
  # "Gujarati", "Odia"
505
  # ],
506
+ # value="Hindi",
507
  # label="Select Language for Translation"
508
  # )
509
 
510
  # prompt_html = gr.HTML()
 
511
  # translated_textbox = gr.Textbox(label="Translated Response")
512
+
513
+ # def update_chat_and_translate(txt, cross_encoder, history_state, language_dropdown):
514
+ # history = history_state.value if history_state.value else []
515
  # history.append((txt, ""))
 
516
 
517
+ # # Call simple_chat_function
518
+ # msg, history, prompt_html_content = simple_chat_function(txt, history, cross_encoder)
 
 
 
 
 
 
 
519
 
520
  # # Translate text
521
  # translated_text = translate_text(language_dropdown, history)
522
+
523
+ # return history, prompt_html_content, translated_text
524
 
525
+ # txt_msg = txt_btn.click(update_chat_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
526
+ # txt_msg = txt.submit(update_chat_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
527
 
528
+ # examples = [
529
+ # 'My transhipment cargo is missing',
530
+ # 'Can you explain and tabulate the difference between B-17 bond and a warehousing bond?',
531
+ # 'What are the benefits of the AEO Scheme and eligibility criteria?',
532
+ # 'What are penalties for customs offences?',
533
+ # 'What are penalties for customs officers misusing their powers under the Customs Act?',
534
+ # 'What are eligibility criteria for exemption from cost recovery charges?',
535
+ # 'List in detail the procedure for obtaining new approval for opening a CFS attached to an ICD'
536
+ # ]
537
 
538
  # gr.Examples(examples, txt)
539
 
 
540
  # # Launch the Gradio application
541
+ # CHATBOT.launch(share=True, debug=True)# import requests
542
+ # # import gradio as gr
543
+ # # from ragatouille import RAGPretrainedModel
544
+ # # import logging
545
+ # # from pathlib import Path
546
+ # # from time import perf_counter
547
+ # # from sentence_transformers import CrossEncoder
548
+ # # from huggingface_hub import InferenceClient
549
+ # # from jinja2 import Environment, FileSystemLoader
550
+ # # import numpy as np
551
+ # # from os import getenv
552
+ # # from backend.query_llm import generate_hf, generate_qwen
553
+ # # from backend.semantic_search import table, retriever
554
+ # # from huggingface_hub import InferenceClient
555
+
556
+
557
+ # # # Bhashini API translation function
558
+ # # api_key = getenv('API_KEY')
559
+ # # user_id = getenv('USER_ID')
560
+
561
+ # # def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
562
+ # # """Translates text from source language to target language using the Bhashini API."""
563
+
564
+ # # if not text.strip():
565
+ # # print('Input text is empty. Please provide valid text for translation.')
566
+ # # return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
567
+ # # else:
568
+ # # print('Input text - ',text)
569
+ # # print(f'Starting translation process from {from_code} to {to_code}...')
570
+ # # print(f'Starting translation process from {from_code} to {to_code}...')
571
+ # # gr.Warning(f'Translating to {to_code}...')
572
+
573
+ # # url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline'
574
+ # # headers = {
575
+ # # "Content-Type": "application/json",
576
+ # # "userID": user_id,
577
+ # # "ulcaApiKey": api_key
578
+ # # }
579
+ # # payload = {
580
+ # # "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
581
+ # # "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
582
+ # # }
583
+
584
+ # # print('Sending initial request to get the pipeline...')
585
+ # # response = requests.post(url, json=payload, headers=headers)
586
+
587
+ # # if response.status_code != 200:
588
+ # # print(f'Error in initial request: {response.status_code}')
589
+ # # return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None}
590
+
591
+ # # print('Initial request successful, processing response...')
592
+ # # response_data = response.json()
593
+ # # service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"]
594
+ # # callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]
595
+
596
+ # # print(f'Service ID: {service_id}, Callback URL: {callback_url}')
597
+
598
+ # # headers2 = {
599
+ # # "Content-Type": "application/json",
600
+ # # response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
601
+ # # }
602
+ # # compute_payload = {
603
+ # # "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}],
604
+ # # "inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]}
605
+ # # }
606
+
607
+ # # print(f'Sending translation request with text: "{text}"')
608
+ # # compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)
609
+
610
+ # # if compute_response.status_code != 200:
611
+ # # print(f'Error in translation request: {compute_response.status_code}')
612
+ # # return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None}
613
+
614
+ # # print('Translation request successful, processing translation...')
615
+ # # compute_response_data = compute_response.json()
616
+ # # translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
617
+
618
+ # # print(f'Translation successful. Translated content: "{translated_content}"')
619
+ # # return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
620
+
621
+
622
+ # # # Existing chatbot functions
623
+ # # VECTOR_COLUMN_NAME = "vector"
624
+ # # TEXT_COLUMN_NAME = "text"
625
+ # # HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
626
+ # # proj_dir = Path(__file__).parent
627
+
628
+ # # logging.basicConfig(level=logging.INFO)
629
+ # # logger = logging.getLogger(__name__)
630
+ # # client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
631
+ # # proj_dir = Path(__file__).parent
632
+ # # env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
633
+
634
+ # # template = env.get_template('template.j2')
635
+ # # template_html = env.get_template('template_html.j2')
636
+
637
+ # # # def add_text(history, text):
638
+ # # # history = [] if history is None else history
639
+ # # # history = history + [(text, None)]
640
+ # # # return history, gr.Textbox(value="", interactive=False)
641
+
642
+ # # def bot(history, cross_encoder):
643
+
644
+ # # top_rerank = 25
645
+ # # top_k_rank = 20
646
+ # # query = history[-1][0] if history else ''
647
+ # # print('\nQuery: ',query )
648
+ # # print('\nHistory:',history)
649
+ # # if not query:
650
+ # # gr.Warning("Please submit a non-empty string as a prompt")
651
+ # # raise ValueError("Empty string was submitted")
652
+
653
+ # # logger.warning('Retrieving documents...')
654
+
655
+ # # if cross_encoder == '(HIGH ACCURATE) ColBERT':
656
+ # # gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait')
657
+ # # RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
658
+ # # RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
659
+ # # documents_full = RAG_db.search(query, k=top_k_rank)
660
+
661
+ # # documents = [item['content'] for item in documents_full]
662
+ # # prompt = template.render(documents=documents, query=query)
663
+ # # prompt_html = template_html.render(documents=documents, query=query)
664
+
665
+ # # generate_fn = generate_hf
666
+
667
+ # # history[-1][1] = ""
668
+ # # for character in generate_fn(prompt, history[:-1]):
669
+ # # history[-1][1] = character
670
+ # # yield history, prompt_html
671
+ # # else:
672
+ # # document_start = perf_counter()
673
+
674
+ # # query_vec = retriever.encode(query)
675
+ # # doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
676
+
677
+ # # documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
678
+ # # documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
679
+
680
+ # # query_doc_pair = [[query, doc] for doc in documents]
681
+ # # if cross_encoder == '(FAST) MiniLM-L6v2':
682
+ # # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
683
+ # # elif cross_encoder == '(ACCURATE) BGE reranker':
684
+ # # cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
685
+
686
+ # # cross_scores = cross_encoder1.predict(query_doc_pair)
687
+ # # sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
688
+
689
+ # # documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
690
+
691
+ # # document_time = perf_counter() - document_start
692
+
693
+ # # prompt = template.render(documents=documents, query=query)
694
+ # # prompt_html = template_html.render(documents=documents, query=query)
695
+
696
+ # # #generate_fn = generate_hf
697
+ # # generate_fn=generate_qwen
698
+ # # # Create a new history entry instead of modifying the tuple directly
699
+ # # new_history = history[:-1] + [ (prompt, "") ] # query replaced prompt
700
+ # # output=''
701
+ # # # for character in generate_fn(prompt, history[:-1]):
702
+ # # # #new_history[-1] = (query, character)
703
+ # # # output+=character
704
+ # # output=generate_fn(prompt, history[:-1])
705
+
706
+ # # print('Output:',output)
707
+ # # new_history[-1] = (prompt, output) #query replaced with prompt
708
+ # # print('New History',new_history)
709
+ # # #print('prompt html',prompt_html)# Update the last tuple with new text
710
+
711
+ # # history_list = list(history[-1])
712
+ # # history_list[1] = output # Assuming `character` is what you want to assign
713
+ # # # Update the history with the modified list converted back to a tuple
714
+ # # history[-1] = tuple(history_list)
715
+
716
+ # # #history[-1][1] = character
717
+ # # # yield new_history, prompt_html
718
+ # # yield history, prompt_html
719
+ # # # new_history,prompt_html
720
+ # # # history[-1][1] = ""
721
+ # # # for character in generate_fn(prompt, history[:-1]):
722
+ # # # history[-1][1] = character
723
+ # # # yield history, prompt_html
724
+
725
+ # # #def translate_text(response_text, selected_language):
726
+
727
+ # # def translate_text(selected_language,history):
728
+
729
+ # # iso_language_codes = {
730
+ # # "Hindi": "hi",
731
+ # # "Gom": "gom",
732
+ # # "Kannada": "kn",
733
+ # # "Dogri": "doi",
734
+ # # "Bodo": "brx",
735
+ # # "Urdu": "ur",
736
+ # # "Tamil": "ta",
737
+ # # "Kashmiri": "ks",
738
+ # # "Assamese": "as",
739
+ # # "Bengali": "bn",
740
+ # # "Marathi": "mr",
741
+ # # "Sindhi": "sd",
742
+ # # "Maithili": "mai",
743
+ # # "Punjabi": "pa",
744
+ # # "Malayalam": "ml",
745
+ # # "Manipuri": "mni",
746
+ # # "Telugu": "te",
747
+ # # "Sanskrit": "sa",
748
+ # # "Nepali": "ne",
749
+ # # "Santali": "sat",
750
+ # # "Gujarati": "gu",
751
+ # # "Odia": "or"
752
+ # # }
753
+
754
+ # # to_code = iso_language_codes[selected_language]
755
+ # # response_text = history[-1][1] if history else ''
756
+ # # print('response_text for translation',response_text)
757
+ # # translation = bhashini_translate(response_text, to_code=to_code)
758
+ # # return translation['translated_content']
759
+
760
+
761
+ # # # Gradio interface
762
+ # # with gr.Blocks(theme='gradio/soft') as CHATBOT:
763
+ # # history_state = gr.State([])
764
+ # # with gr.Row():
765
+ # # with gr.Column(scale=10):
766
+ # # gr.HTML(value="""<div style="color: #FF4500;"><h1>ADWITIYA-</h1> <h1><span style="color: #008000">Custom Manual Chatbot </span></h1></div>""")
767
+ # # gr.HTML(value=f"""<p style="font-family: sans-serif; font-size: 16px;">Using GenAI for CBIC Capacity Building - A free chat bot developed by National Customs Targeting Center using Open source LLMs for CBIC Officers</p>""")
768
+ # # gr.HTML(value=f"""<p style="font-family: Arial, sans-serif; font-size: 14px;">Developed by NCTC,Mumbai. Suggestions may be sent to <a href="mailto:[email protected]" style="color: #00008B; font-style: italic;">[email protected]</a>.</p>""")
769
+
770
+ # # with gr.Column(scale=3):
771
+ # # gr.Image(value='logo.png', height=200, width=200)
772
+
773
+ # # chatbot = gr.Chatbot(
774
+ # # [],
775
+ # # elem_id="chatbot",
776
+ # # avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
777
+ # # 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
778
+ # # bubble_full_width=False,
779
+ # # show_copy_button=True,
780
+ # # show_share_button=True,
781
+ # # )
782
+
783
+ # # with gr.Row():
784
+ # # txt = gr.Textbox(
785
+ # # scale=3,
786
+ # # show_label=False,
787
+ # # placeholder="Enter text and press enter",
788
+ # # container=False,
789
+ # # )
790
+ # # txt_btn = gr.Button(value="Submit text", scale=1)
791
+
792
+ # # cross_encoder = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], value='(ACCURATE) BGE reranker', label="Embeddings", info="Only First query to Colbert may take little time)")
793
+ # # language_dropdown = gr.Dropdown(
794
+ # # choices=[
795
+ # # "Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi",
796
+ # # "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
797
+ # # "Gujarati", "Odia"
798
+ # # ],
799
+ # # value="Hindi", # default to Hindi
800
+ # # label="Select Language for Translation"
801
+ # # )
802
+
803
+ # # prompt_html = gr.HTML()
804
+
805
+ # # translated_textbox = gr.Textbox(label="Translated Response")
806
+ # # def update_history_and_translate(txt, cross_encoder, history_state, language_dropdown):
807
+ # # print('History state',history_state)
808
+ # # history = history_state
809
+ # # history.append((txt, ""))
810
+ # # #history_state.value=(history)
811
+
812
+ # # # Call bot function
813
+ # # # bot_output = list(bot(history, cross_encoder))
814
+ # # bot_output = next(bot(history, cross_encoder))
815
+ # # print('bot_output',bot_output)
816
+ # # #history, prompt_html = bot_output[-1]
817
+ # # history, prompt_html = bot_output
818
+ # # print('History',history)
819
+ # # # Update the history state
820
+ # # history_state[:] = history
821
+
822
+ # # # Translate text
823
+ # # translated_text = translate_text(language_dropdown, history)
824
+ # # return history, prompt_html, translated_text
825
+
826
+ # # txt_msg = txt_btn.click(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
827
+ # # txt_msg = txt.submit(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
828
+
829
+ # # examples = ['My transhipment cargo is missing','can u explain and tabulate difference between b 17 bond and a warehousing bond',
830
+ # # 'What are benefits of the AEO Scheme and eligibility criteria?',
831
+ # # 'What are penalties for customs offences? ', 'what are penalties to customs officers misusing their powers under customs act?','What are eligibility criteria for exemption from cost recovery charges','list in detail what is procedure for obtaining new approval for openeing a CFS attached to an ICD']
832
+
833
+ # # gr.Examples(examples, txt)
834
+
835
+
836
+ # # # Launch the Gradio application
837
+ # # CHATBOT.launch(share=True,debug=True)
838