Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from auditqa.sample_questions import QUESTIONS
|
|
12 |
from auditqa.reports import files, report_list, new_files, new_report_list
|
13 |
from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
14 |
from auditqa.retriever import get_context
|
15 |
-
from auditqa.reader import nvidia_client, dedicated_endpoint, serverless_api
|
16 |
from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip, get_platform_info
|
17 |
from dotenv import load_dotenv
|
18 |
load_dotenv()
|
@@ -304,6 +304,34 @@ async def chat(query,history, method, sources,reports,subtype, client_ip=None, s
|
|
304 |
async for update in process_stream():
|
305 |
yield update
|
306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
elif model_config.get('reader','TYPE') == 'DEDICATED':
|
308 |
chat_model = dedicated_endpoint()
|
309 |
### adding for assessing computation time
|
|
|
12 |
from auditqa.reports import files, report_list, new_files, new_report_list
|
13 |
from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
14 |
from auditqa.retriever import get_context
|
15 |
+
from auditqa.reader import nvidia_client, dedicated_endpoint, serverless_api, inf_provider
|
16 |
from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip, get_platform_info
|
17 |
from dotenv import load_dotenv
|
18 |
load_dotenv()
|
|
|
304 |
async for update in process_stream():
|
305 |
yield update
|
306 |
|
307 |
+
elif model_config.get('reader','TYPE') == 'INF_PROVIDERS':
|
308 |
+
chat_model = inf_provider()
|
309 |
+
start_time = time.time()
|
310 |
+
async def process_stream():
|
311 |
+
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
312 |
+
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
|
313 |
+
# instead of modifying the one from the outer scope.
|
314 |
+
# Iterate over the streaming response chunks
|
315 |
+
response = chat_model.chat.completions.create(
|
316 |
+
model=model_config.get("reader","INF_PROVIDER_MODEL"),
|
317 |
+
messages = messages,
|
318 |
+
stream= True,
|
319 |
+
max_tokens=int(model_config.get('reader','MAX_TOKENS')),
|
320 |
+
)
|
321 |
+
for message in response:
|
322 |
+
token = message.choices[0].delta.content
|
323 |
+
if token:
|
324 |
+
answer_yet += token
|
325 |
+
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
326 |
+
history[-1] = (query, parsed_answer)
|
327 |
+
logs_data["answer"] = parsed_answer
|
328 |
+
yield [tuple(x) for x in history], docs_html, logs_data, session_id
|
329 |
+
|
330 |
+
# Stream the response updates
|
331 |
+
async for update in process_stream():
|
332 |
+
yield update
|
333 |
+
|
334 |
+
|
335 |
elif model_config.get('reader','TYPE') == 'DEDICATED':
|
336 |
chat_model = dedicated_endpoint()
|
337 |
### adding for assessing computation time
|