ppsingh commited on
Commit
8b3920c
·
verified ·
1 Parent(s): 61c33da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -1
app.py CHANGED
@@ -12,7 +12,7 @@ from auditqa.sample_questions import QUESTIONS
12
  from auditqa.reports import files, report_list, new_files, new_report_list
13
  from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
14
  from auditqa.retriever import get_context
15
- from auditqa.reader import nvidia_client, dedicated_endpoint, serverless_api
16
  from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip, get_platform_info
17
  from dotenv import load_dotenv
18
  load_dotenv()
@@ -304,6 +304,34 @@ async def chat(query,history, method, sources,reports,subtype, client_ip=None, s
304
  async for update in process_stream():
305
  yield update
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  elif model_config.get('reader','TYPE') == 'DEDICATED':
308
  chat_model = dedicated_endpoint()
309
  ### adding for assessing computation time
 
12
  from auditqa.reports import files, report_list, new_files, new_report_list
13
  from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
14
  from auditqa.retriever import get_context
15
+ from auditqa.reader import nvidia_client, dedicated_endpoint, serverless_api, inf_provider
16
  from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip, get_platform_info
17
  from dotenv import load_dotenv
18
  load_dotenv()
 
304
  async for update in process_stream():
305
  yield update
306
 
307
+ elif model_config.get('reader','TYPE') == 'INF_PROVIDERS':
308
+ chat_model = inf_provider()
309
+ start_time = time.time()
310
+ async def process_stream():
311
+ nonlocal answer_yet # Use the outer scope's answer_yet variable
312
+ # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
313
+ # instead of modifying the one from the outer scope.
314
+ # Iterate over the streaming response chunks
315
+ response = chat_model.chat.completions.create(
316
+ model=model_config.get("reader","INF_PROVIDER_MODEL"),
317
+ messages = messages,
318
+ stream= True,
319
+ max_tokens=int(model_config.get('reader','MAX_TOKENS')),
320
+ )
321
+ for message in response:
322
+ token = message.choices[0].delta.content
323
+ if token:
324
+ answer_yet += token
325
+ parsed_answer = parse_output_llm_with_sources(answer_yet)
326
+ history[-1] = (query, parsed_answer)
327
+ logs_data["answer"] = parsed_answer
328
+ yield [tuple(x) for x in history], docs_html, logs_data, session_id
329
+
330
+ # Stream the response updates
331
+ async for update in process_stream():
332
+ yield update
333
+
334
+
335
  elif model_config.get('reader','TYPE') == 'DEDICATED':
336
  chat_model = dedicated_endpoint()
337
  ### adding for assessing computation time