NCTCMumbai commited on
Commit
5ad9673
·
verified ·
1 Parent(s): 386373d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +415 -145
app.py CHANGED
@@ -1,32 +1,26 @@
1
- import requests
2
  import gradio as gr
3
- from ragatouille import RAGPretrainedModel
 
4
  import logging
5
  from pathlib import Path
6
  from time import perf_counter
7
  from sentence_transformers import CrossEncoder
8
- from huggingface_hub import InferenceClient
9
- from jinja2 import Environment, FileSystemLoader
10
  import numpy as np
11
  from os import getenv
12
- from backend.query_llm import generate_hf, generate_qwen
13
- from backend.semantic_search import table, retriever
14
- from huggingface_hub import InferenceClient
15
-
16
 
17
  # Bhashini API translation function
18
- api_key = getenv('API_KEY')
19
- user_id = getenv('USER_ID')
20
 
21
  def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
22
  """Translates text from source language to target language using the Bhashini API."""
23
-
24
  if not text.strip():
25
  print('Input text is empty. Please provide valid text for translation.')
26
  return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
27
  else:
28
- print('Input text - ',text)
29
- print(f'Starting translation process from {from_code} to {to_code}...')
30
  print(f'Starting translation process from {from_code} to {to_code}...')
31
  gr.Warning(f'Translating to {to_code}...')
32
 
@@ -36,6 +30,11 @@ def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") ->
36
  "userID": user_id,
37
  "ulcaApiKey": api_key
38
  }
 
 
 
 
 
39
  payload = {
40
  "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
41
  "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
@@ -78,7 +77,6 @@ def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") ->
78
  print(f'Translation successful. Translated content: "{translated_content}"')
79
  return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
80
 
81
-
82
  # Existing chatbot functions
83
  VECTOR_COLUMN_NAME = "vector"
84
  TEXT_COLUMN_NAME = "text"
@@ -87,135 +85,114 @@ proj_dir = Path(__file__).parent
87
 
88
  logging.basicConfig(level=logging.INFO)
89
  logger = logging.getLogger(__name__)
90
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
91
- env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
92
 
 
 
93
  template = env.get_template('template.j2')
94
  template_html = env.get_template('template_html.j2')
95
 
96
- # def add_text(history, text):
97
- # history = [] if history is None else history
98
- # history = history + [(text, None)]
99
- # return history, gr.Textbox(value="", interactive=False)
 
 
100
 
101
- def bot(history, cross_encoder):
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
 
 
 
 
 
103
  top_rerank = 25
104
  top_k_rank = 20
105
- query = history[-1][0] if history else ''
106
- print('\nQuery: ',query )
107
- print('\nHistory:',history)
108
- if not query:
109
- gr.Warning("Please submit a non-empty string as a prompt")
110
- raise ValueError("Empty string was submitted")
111
-
112
- logger.warning('Retrieving documents...')
113
-
114
- if cross_encoder == '(HIGH ACCURATE) ColBERT':
115
- gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait')
116
- RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
117
- RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
118
- documents_full = RAG_db.search(query, k=top_k_rank)
119
-
120
- documents = [item['content'] for item in documents_full]
121
- prompt = template.render(documents=documents, query=query)
122
- prompt_html = template_html.render(documents=documents, query=query)
123
-
124
- generate_fn = generate_hf
125
-
126
- history[-1][1] = ""
127
- for character in generate_fn(prompt, history[:-1]):
128
- history[-1][1] = character
129
- yield history, prompt_html
130
- else:
131
- document_start = perf_counter()
132
-
133
- query_vec = retriever.encode(query)
134
- doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
135
 
 
 
 
 
 
136
  documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
137
  documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
138
-
139
- query_doc_pair = [[query, doc] for doc in documents]
140
- if cross_encoder == '(FAST) MiniLM-L6v2':
141
- cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
142
- elif cross_encoder == '(ACCURATE) BGE reranker':
143
- cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
144
 
145
- cross_scores = cross_encoder1.predict(query_doc_pair)
146
- sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
149
-
150
- document_time = perf_counter() - document_start
151
-
152
- prompt = template.render(documents=documents, query=query)
153
- prompt_html = template_html.render(documents=documents, query=query)
154
-
155
- #generate_fn = generate_hf
156
- generate_fn=generate_qwen
157
- # Create a new history entry instead of modifying the tuple directly
158
- new_history = history[:-1] + [ (prompt, "") ] # query replaced prompt
159
- output=''
160
- # for character in generate_fn(prompt, history[:-1]):
161
- # #new_history[-1] = (query, character)
162
- # output+=character
163
- output=generate_fn(prompt, history[:-1])
164
 
165
- print('Output:',output)
166
- new_history[-1] = (prompt, output) #query replaced with prompt
167
- print('New History',new_history)
168
- #print('prompt html',prompt_html)# Update the last tuple with new text
 
 
169
 
170
- history_list = list(history[-1])
171
- history_list[1] = output # Assuming `character` is what you want to assign
172
- # Update the history with the modified list converted back to a tuple
173
- history[-1] = tuple(history_list)
174
-
175
- #history[-1][1] = character
176
- # yield new_history, prompt_html
177
- yield history, prompt_html
178
- # new_history,prompt_html
179
- # history[-1][1] = ""
180
- # for character in generate_fn(prompt, history[:-1]):
181
- # history[-1][1] = character
182
- # yield history, prompt_html
183
-
184
- #def translate_text(response_text, selected_language):
185
-
186
- def translate_text(selected_language,history):
187
 
 
 
 
 
 
 
188
  iso_language_codes = {
189
- "Hindi": "hi",
190
- "Gom": "gom",
191
- "Kannada": "kn",
192
- "Dogri": "doi",
193
- "Bodo": "brx",
194
- "Urdu": "ur",
195
- "Tamil": "ta",
196
- "Kashmiri": "ks",
197
- "Assamese": "as",
198
- "Bengali": "bn",
199
- "Marathi": "mr",
200
- "Sindhi": "sd",
201
- "Maithili": "mai",
202
- "Punjabi": "pa",
203
- "Malayalam": "ml",
204
- "Manipuri": "mni",
205
- "Telugu": "te",
206
- "Sanskrit": "sa",
207
- "Nepali": "ne",
208
- "Santali": "sat",
209
- "Gujarati": "gu",
210
- "Odia": "or"
211
  }
212
 
213
  to_code = iso_language_codes[selected_language]
214
- response_text = history[-1][1] if history else ''
215
- print('response_text for translation',response_text)
216
  translation = bhashini_translate(response_text, to_code=to_code)
217
- return translation['translated_content']
218
-
219
 
220
  # Gradio interface
221
  with gr.Blocks(theme='gradio/soft') as CHATBOT:
@@ -255,43 +232,336 @@ with gr.Blocks(theme='gradio/soft') as CHATBOT:
255
  "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
256
  "Gujarati", "Odia"
257
  ],
258
- value="Hindi", # default to Hindi
259
  label="Select Language for Translation"
260
  )
261
 
262
  prompt_html = gr.HTML()
263
-
264
  translated_textbox = gr.Textbox(label="Translated Response")
265
- def update_history_and_translate(txt, cross_encoder, history_state, language_dropdown):
266
- print('History state',history_state)
267
- history = history_state
268
  history.append((txt, ""))
269
- #history_state.value=(history)
270
 
271
- # Call bot function
272
- # bot_output = list(bot(history, cross_encoder))
273
- bot_output = next(bot(history, cross_encoder))
274
- print('bot_output',bot_output)
275
- #history, prompt_html = bot_output[-1]
276
- history, prompt_html = bot_output
277
- print('History',history)
278
- # Update the history state
279
- history_state[:] = history
280
 
281
  # Translate text
282
  translated_text = translate_text(language_dropdown, history)
283
- return history, prompt_html, translated_text
 
284
 
285
- txt_msg = txt_btn.click(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
286
- txt_msg = txt.submit(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
287
 
288
- examples = ['My transhipment cargo is missing','can u explain and tabulate difference between b 17 bond and a warehousing bond',
289
- 'What are benefits of the AEO Scheme and eligibility criteria?',
290
- 'What are penalties for customs offences? ', 'what are penalties to customs officers misusing their powers under customs act?','What are eligibility criteria for exemption from cost recovery charges','list in detail what is procedure for obtaining new approval for openeing a CFS attached to an ICD']
 
 
 
 
 
 
291
 
292
  gr.Examples(examples, txt)
293
 
294
-
295
  # Launch the Gradio application
296
- CHATBOT.launch(share=True,debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
 
 
1
  import gradio as gr
2
+ from phi.agent import Agent
3
+ from phi.model.groq import Groq
4
  import logging
5
  from pathlib import Path
6
  from time import perf_counter
7
  from sentence_transformers import CrossEncoder
 
 
8
  import numpy as np
9
  from os import getenv
10
+ import requests
11
+ from jinja2 import Environment, FileSystemLoader
 
 
12
 
13
  # Bhashini API translation function
14
+ api_key = getenv('API_KEY', '').strip()
15
+ user_id = getenv('USER_ID', '').strip()
16
 
17
  def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
18
  """Translates text from source language to target language using the Bhashini API."""
 
19
  if not text.strip():
20
  print('Input text is empty. Please provide valid text for translation.')
21
  return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
22
  else:
23
+ print('Input text - ', text)
 
24
  print(f'Starting translation process from {from_code} to {to_code}...')
25
  gr.Warning(f'Translating to {to_code}...')
26
 
 
30
  "userID": user_id,
31
  "ulcaApiKey": api_key
32
  }
33
+ for key, value in headers.items():
34
+ if not isinstance(value, str) or '\n' in value or '\r' in value:
35
+ print(f"Invalid header value for {key}: {value}")
36
+ return {"status_code": 400, "message": f"Invalid header value for {key}", "translated_content": None}
37
+
38
  payload = {
39
  "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
40
  "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
 
77
  print(f'Translation successful. Translated content: "{translated_content}"')
78
  return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
79
 
 
80
  # Existing chatbot functions
81
  VECTOR_COLUMN_NAME = "vector"
82
  TEXT_COLUMN_NAME = "text"
 
85
 
86
  logging.basicConfig(level=logging.INFO)
87
  logger = logging.getLogger(__name__)
 
 
88
 
89
+ # Set up Jinja2 environment
90
+ env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
91
  template = env.get_template('template.j2')
92
  template_html = env.get_template('template_html.j2')
93
 
94
+ # Initialize Grok Agent
95
+ api_key = getenv("GROQ_API_KEY")
96
+ if not api_key:
97
+ gr.Warning("GROQ_API_KEY not found. Set it in 'Repository secrets'.")
98
+ logger.error("GROQ_API_KEY not found.")
99
+ api_key = "" # Fallback, but will fail without a key
100
 
101
+ agent = Agent(
102
+ name="Customs Assistant",
103
+ role="You are a helpful assistant for CBIC officers, providing guidance on customs procedures and regulations.",
104
+ instructions=[
105
+ "You are an expert in customs regulations and CBIC procedures.",
106
+ "Provide clear, accurate, and professional explanations.",
107
+ "Use simple language and examples relevant to customs officers.",
108
+ "Focus on topics like transhipment, AEO schemes, bonds, penalties, and CFS approvals.",
109
+ "Structure responses with headings and bullet points when helpful.",
110
+ "If you don’t know the answer, say 'I don’t have enough information to answer that.'"
111
+ ],
112
+ model=Groq(id="llama3-70b-8192", api_key=api_key),
113
+ markdown=True
114
+ )
115
 
116
+ def simple_chat_function(message, history, cross_encoder_choice):
117
+ """Chat function with semantic search and Grok agent integration"""
118
+ if not message.strip():
119
+ return "", history, ""
120
+
121
  top_rerank = 25
122
  top_k_rank = 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ try:
125
+ start_time = perf_counter()
126
+
127
+ # Encode query and search documents
128
+ query_vec = retriever.encode(message)
129
  documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
130
  documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
 
 
 
 
 
 
131
 
132
+ # Re-rank documents using cross-encoder
133
+ if cross_encoder_choice == '(FAST) MiniLM-L6v2':
134
+ cross_encoder_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
135
+ elif cross_encoder_choice == '(ACCURATE) BGE reranker':
136
+ cross_encoder_model = CrossEncoder('BAAI/bge-reranker-base')
137
+ elif cross_encoder_choice == '(HIGH ACCURATE) ColBERT':
138
+ gr.Warning('Retrieving using ColBERT.. First time query may take a minute for model to load..pls wait')
139
+ from ragatouille import RAGPretrainedModel
140
+ RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
141
+ RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
142
+ documents = [item['content'] for item in RAG_db.search(message, k=top_k_rank)]
143
+ cross_encoder_model = None # No re-ranking needed for ColBERT
144
+
145
+ if cross_encoder_model:
146
+ query_doc_pair = [[message, doc] for doc in documents]
147
+ cross_scores = cross_encoder_model.predict(query_doc_pair)
148
+ sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
149
+ documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
150
 
151
+ # Create context from top documents
152
+ context = "\n\n".join(documents[:10]) if documents else ""
153
+ context = f"Context information from customs materials:\n{context}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ # Add conversation history for context
156
+ history_context = ""
157
+ if history and len(history) > 0:
158
+ for user_msg, bot_msg in history[-2:]: # Last 2 exchanges
159
+ if user_msg and bot_msg:
160
+ history_context += f"Previous Q: {user_msg}\nPrevious A: {bot_msg}\n"
161
 
162
+ # Create full prompt
163
+ full_prompt = f"{history_context}{context}Question: {message}\n\nPlease answer the question using the context provided above. If the context doesn't contain relevant information, use your general knowledge about CBIC customs procedures."
164
+
165
+ # Generate response
166
+ response = agent.run(full_prompt)
167
+ response_text = response.content if hasattr(response, 'content') else str(response)
168
+
169
+ # Add to history
170
+ history.append([message, response_text])
171
+
172
+ # Render template with documents and query
173
+ prompt_html = template_html.render(documents=documents, query=message)
174
+
175
+ logger.info(f"Response generation took {perf_counter() - start_time:.2f} seconds")
176
+ return "", history, prompt_html
 
 
177
 
178
+ except Exception as e:
179
+ logger.error(f"Error in response generation: {e}")
180
+ return "", history, f"Error generating response: {str(e)}"
181
+
182
+ def translate_text(selected_language, history):
183
+ """Translate the last response in history to the selected language."""
184
  iso_language_codes = {
185
+ "Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur",
186
+ "Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr",
187
+ "Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni",
188
+ "Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
 
191
  to_code = iso_language_codes[selected_language]
192
+ response_text = history[-1][1] if history and history[-1][1] else ''
193
+ print('response_text for translation', response_text)
194
  translation = bhashini_translate(response_text, to_code=to_code)
195
+ return translation.get('translated_content', 'Translation failed.')
 
196
 
197
  # Gradio interface
198
  with gr.Blocks(theme='gradio/soft') as CHATBOT:
 
232
  "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
233
  "Gujarati", "Odia"
234
  ],
235
+ value="Hindi",
236
  label="Select Language for Translation"
237
  )
238
 
239
  prompt_html = gr.HTML()
 
240
  translated_textbox = gr.Textbox(label="Translated Response")
241
+
242
+ def update_chat_and_translate(txt, cross_encoder, history_state, language_dropdown):
243
+ history = history_state.value if history_state.value else []
244
  history.append((txt, ""))
 
245
 
246
+ # Call simple_chat_function
247
+ msg, history, prompt_html_content = simple_chat_function(txt, history, cross_encoder)
 
 
 
 
 
 
 
248
 
249
  # Translate text
250
  translated_text = translate_text(language_dropdown, history)
251
+
252
+ return history, prompt_html_content, translated_text
253
 
254
+ txt_msg = txt_btn.click(update_chat_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
255
+ txt_msg = txt.submit(update_chat_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
256
 
257
+ examples = [
258
+ 'My transhipment cargo is missing',
259
+ 'Can you explain and tabulate the difference between B-17 bond and a warehousing bond?',
260
+ 'What are the benefits of the AEO Scheme and eligibility criteria?',
261
+ 'What are penalties for customs offences?',
262
+ 'What are penalties for customs officers misusing their powers under the Customs Act?',
263
+ 'What are eligibility criteria for exemption from cost recovery charges?',
264
+ 'List in detail the procedure for obtaining new approval for opening a CFS attached to an ICD'
265
+ ]
266
 
267
  gr.Examples(examples, txt)
268
 
 
269
  # Launch the Gradio application
270
+ CHATBOT.launch(share=True, debug=True)# import requests
271
+ # import gradio as gr
272
+ # from ragatouille import RAGPretrainedModel
273
+ # import logging
274
+ # from pathlib import Path
275
+ # from time import perf_counter
276
+ # from sentence_transformers import CrossEncoder
277
+ # from huggingface_hub import InferenceClient
278
+ # from jinja2 import Environment, FileSystemLoader
279
+ # import numpy as np
280
+ # from os import getenv
281
+ # from backend.query_llm import generate_hf, generate_qwen
282
+ # from backend.semantic_search import table, retriever
283
+ # from huggingface_hub import InferenceClient
284
+
285
+
286
+ # # Bhashini API translation function
287
+ # api_key = getenv('API_KEY')
288
+ # user_id = getenv('USER_ID')
289
+
290
+ # def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict:
291
+ # """Translates text from source language to target language using the Bhashini API."""
292
+
293
+ # if not text.strip():
294
+ # print('Input text is empty. Please provide valid text for translation.')
295
+ # return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None}
296
+ # else:
297
+ # print('Input text - ',text)
298
+ # print(f'Starting translation process from {from_code} to {to_code}...')
299
+ # print(f'Starting translation process from {from_code} to {to_code}...')
300
+ # gr.Warning(f'Translating to {to_code}...')
301
+
302
+ # url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline'
303
+ # headers = {
304
+ # "Content-Type": "application/json",
305
+ # "userID": user_id,
306
+ # "ulcaApiKey": api_key
307
+ # }
308
+ # payload = {
309
+ # "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}],
310
+ # "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"}
311
+ # }
312
+
313
+ # print('Sending initial request to get the pipeline...')
314
+ # response = requests.post(url, json=payload, headers=headers)
315
+
316
+ # if response.status_code != 200:
317
+ # print(f'Error in initial request: {response.status_code}')
318
+ # return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None}
319
+
320
+ # print('Initial request successful, processing response...')
321
+ # response_data = response.json()
322
+ # service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"]
323
+ # callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"]
324
+
325
+ # print(f'Service ID: {service_id}, Callback URL: {callback_url}')
326
+
327
+ # headers2 = {
328
+ # "Content-Type": "application/json",
329
+ # response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"]
330
+ # }
331
+ # compute_payload = {
332
+ # "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}],
333
+ # "inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]}
334
+ # }
335
+
336
+ # print(f'Sending translation request with text: "{text}"')
337
+ # compute_response = requests.post(callback_url, json=compute_payload, headers=headers2)
338
+
339
+ # if compute_response.status_code != 200:
340
+ # print(f'Error in translation request: {compute_response.status_code}')
341
+ # return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None}
342
+
343
+ # print('Translation request successful, processing translation...')
344
+ # compute_response_data = compute_response.json()
345
+ # translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"]
346
+
347
+ # print(f'Translation successful. Translated content: "{translated_content}"')
348
+ # return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content}
349
+
350
+
351
+ # # Existing chatbot functions
352
+ # VECTOR_COLUMN_NAME = "vector"
353
+ # TEXT_COLUMN_NAME = "text"
354
+ # HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
355
+ # proj_dir = Path(__file__).parent
356
+
357
+ # logging.basicConfig(level=logging.INFO)
358
+ # logger = logging.getLogger(__name__)
359
+ # client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN)
360
+ # proj_dir = Path(__file__).parent
361
+ # env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))
362
+
363
+ # template = env.get_template('template.j2')
364
+ # template_html = env.get_template('template_html.j2')
365
+
366
+ # # def add_text(history, text):
367
+ # # history = [] if history is None else history
368
+ # # history = history + [(text, None)]
369
+ # # return history, gr.Textbox(value="", interactive=False)
370
+
371
+ # def bot(history, cross_encoder):
372
+
373
+ # top_rerank = 25
374
+ # top_k_rank = 20
375
+ # query = history[-1][0] if history else ''
376
+ # print('\nQuery: ',query )
377
+ # print('\nHistory:',history)
378
+ # if not query:
379
+ # gr.Warning("Please submit a non-empty string as a prompt")
380
+ # raise ValueError("Empty string was submitted")
381
+
382
+ # logger.warning('Retrieving documents...')
383
+
384
+ # if cross_encoder == '(HIGH ACCURATE) ColBERT':
385
+ # gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait')
386
+ # RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
387
+ # RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
388
+ # documents_full = RAG_db.search(query, k=top_k_rank)
389
+
390
+ # documents = [item['content'] for item in documents_full]
391
+ # prompt = template.render(documents=documents, query=query)
392
+ # prompt_html = template_html.render(documents=documents, query=query)
393
+
394
+ # generate_fn = generate_hf
395
+
396
+ # history[-1][1] = ""
397
+ # for character in generate_fn(prompt, history[:-1]):
398
+ # history[-1][1] = character
399
+ # yield history, prompt_html
400
+ # else:
401
+ # document_start = perf_counter()
402
+
403
+ # query_vec = retriever.encode(query)
404
+ # doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
405
+
406
+ # documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list()
407
+ # documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
408
+
409
+ # query_doc_pair = [[query, doc] for doc in documents]
410
+ # if cross_encoder == '(FAST) MiniLM-L6v2':
411
+ # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
412
+ # elif cross_encoder == '(ACCURATE) BGE reranker':
413
+ # cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
414
+
415
+ # cross_scores = cross_encoder1.predict(query_doc_pair)
416
+ # sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
417
+
418
+ # documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
419
+
420
+ # document_time = perf_counter() - document_start
421
+
422
+ # prompt = template.render(documents=documents, query=query)
423
+ # prompt_html = template_html.render(documents=documents, query=query)
424
+
425
+ # #generate_fn = generate_hf
426
+ # generate_fn=generate_qwen
427
+ # # Create a new history entry instead of modifying the tuple directly
428
+ # new_history = history[:-1] + [ (prompt, "") ] # query replaced prompt
429
+ # output=''
430
+ # # for character in generate_fn(prompt, history[:-1]):
431
+ # # #new_history[-1] = (query, character)
432
+ # # output+=character
433
+ # output=generate_fn(prompt, history[:-1])
434
+
435
+ # print('Output:',output)
436
+ # new_history[-1] = (prompt, output) #query replaced with prompt
437
+ # print('New History',new_history)
438
+ # #print('prompt html',prompt_html)# Update the last tuple with new text
439
+
440
+ # history_list = list(history[-1])
441
+ # history_list[1] = output # Assuming `character` is what you want to assign
442
+ # # Update the history with the modified list converted back to a tuple
443
+ # history[-1] = tuple(history_list)
444
+
445
+ # #history[-1][1] = character
446
+ # # yield new_history, prompt_html
447
+ # yield history, prompt_html
448
+ # # new_history,prompt_html
449
+ # # history[-1][1] = ""
450
+ # # for character in generate_fn(prompt, history[:-1]):
451
+ # # history[-1][1] = character
452
+ # # yield history, prompt_html
453
+
454
+ # #def translate_text(response_text, selected_language):
455
+
456
+ # def translate_text(selected_language,history):
457
+
458
+ # iso_language_codes = {
459
+ # "Hindi": "hi",
460
+ # "Gom": "gom",
461
+ # "Kannada": "kn",
462
+ # "Dogri": "doi",
463
+ # "Bodo": "brx",
464
+ # "Urdu": "ur",
465
+ # "Tamil": "ta",
466
+ # "Kashmiri": "ks",
467
+ # "Assamese": "as",
468
+ # "Bengali": "bn",
469
+ # "Marathi": "mr",
470
+ # "Sindhi": "sd",
471
+ # "Maithili": "mai",
472
+ # "Punjabi": "pa",
473
+ # "Malayalam": "ml",
474
+ # "Manipuri": "mni",
475
+ # "Telugu": "te",
476
+ # "Sanskrit": "sa",
477
+ # "Nepali": "ne",
478
+ # "Santali": "sat",
479
+ # "Gujarati": "gu",
480
+ # "Odia": "or"
481
+ # }
482
+
483
+ # to_code = iso_language_codes[selected_language]
484
+ # response_text = history[-1][1] if history else ''
485
+ # print('response_text for translation',response_text)
486
+ # translation = bhashini_translate(response_text, to_code=to_code)
487
+ # return translation['translated_content']
488
+
489
+
490
+ # # Gradio interface
491
+ # with gr.Blocks(theme='gradio/soft') as CHATBOT:
492
+ # history_state = gr.State([])
493
+ # with gr.Row():
494
+ # with gr.Column(scale=10):
495
+ # gr.HTML(value="""<div style="color: #FF4500;"><h1>ADWITIYA-</h1> <h1><span style="color: #008000">Custom Manual Chatbot </span></h1></div>""")
496
+ # gr.HTML(value=f"""<p style="font-family: sans-serif; font-size: 16px;">Using GenAI for CBIC Capacity Building - A free chat bot developed by National Customs Targeting Center using Open source LLMs for CBIC Officers</p>""")
497
+ # gr.HTML(value=f"""<p style="font-family: Arial, sans-serif; font-size: 14px;">Developed by NCTC,Mumbai. Suggestions may be sent to <a href="mailto:[email protected]" style="color: #00008B; font-style: italic;">[email protected]</a>.</p>""")
498
+
499
+ # with gr.Column(scale=3):
500
+ # gr.Image(value='logo.png', height=200, width=200)
501
+
502
+ # chatbot = gr.Chatbot(
503
+ # [],
504
+ # elem_id="chatbot",
505
+ # avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
506
+ # 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
507
+ # bubble_full_width=False,
508
+ # show_copy_button=True,
509
+ # show_share_button=True,
510
+ # )
511
+
512
+ # with gr.Row():
513
+ # txt = gr.Textbox(
514
+ # scale=3,
515
+ # show_label=False,
516
+ # placeholder="Enter text and press enter",
517
+ # container=False,
518
+ # )
519
+ # txt_btn = gr.Button(value="Submit text", scale=1)
520
+
521
+ # cross_encoder = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], value='(ACCURATE) BGE reranker', label="Embeddings", info="Only First query to Colbert may take little time)")
522
+ # language_dropdown = gr.Dropdown(
523
+ # choices=[
524
+ # "Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi",
525
+ # "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali",
526
+ # "Gujarati", "Odia"
527
+ # ],
528
+ # value="Hindi", # default to Hindi
529
+ # label="Select Language for Translation"
530
+ # )
531
+
532
+ # prompt_html = gr.HTML()
533
+
534
+ # translated_textbox = gr.Textbox(label="Translated Response")
535
+ # def update_history_and_translate(txt, cross_encoder, history_state, language_dropdown):
536
+ # print('History state',history_state)
537
+ # history = history_state
538
+ # history.append((txt, ""))
539
+ # #history_state.value=(history)
540
+
541
+ # # Call bot function
542
+ # # bot_output = list(bot(history, cross_encoder))
543
+ # bot_output = next(bot(history, cross_encoder))
544
+ # print('bot_output',bot_output)
545
+ # #history, prompt_html = bot_output[-1]
546
+ # history, prompt_html = bot_output
547
+ # print('History',history)
548
+ # # Update the history state
549
+ # history_state[:] = history
550
+
551
+ # # Translate text
552
+ # translated_text = translate_text(language_dropdown, history)
553
+ # return history, prompt_html, translated_text
554
+
555
+ # txt_msg = txt_btn.click(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
556
+ # txt_msg = txt.submit(update_history_and_translate, [txt, cross_encoder, history_state, language_dropdown], [chatbot, prompt_html, translated_textbox])
557
+
558
+ # examples = ['My transhipment cargo is missing','can u explain and tabulate difference between b 17 bond and a warehousing bond',
559
+ # 'What are benefits of the AEO Scheme and eligibility criteria?',
560
+ # 'What are penalties for customs offences? ', 'what are penalties to customs officers misusing their powers under customs act?','What are eligibility criteria for exemption from cost recovery charges','list in detail what is procedure for obtaining new approval for openeing a CFS attached to an ICD']
561
+
562
+ # gr.Examples(examples, txt)
563
+
564
+
565
+ # # Launch the Gradio application
566
+ # CHATBOT.launch(share=True,debug=True)
567