Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -38,14 +38,6 @@ print(f"ACCOUNT_ID: {ACCOUNT_ID}")
|
|
38 |
print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
|
39 |
|
40 |
|
41 |
-
# Initialize the Hugging Face client
|
42 |
-
client = InferenceClient(model="meta-llama/Llama-3.1", token=huggingface_token)
|
43 |
-
|
44 |
-
# Assuming the API supports such a call (pseudocode example):
|
45 |
-
metadata = client.get_model_metadata()
|
46 |
-
print(metadata["max_input_tokens"]) # This would be a hypothetical field
|
47 |
-
|
48 |
-
|
49 |
MODELS = [
|
50 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
51 |
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
@@ -483,7 +475,7 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
|
|
483 |
|
484 |
logging.info("Finished generating response for Excel data")
|
485 |
|
486 |
-
def truncate_context(context, max_chars=
|
487 |
"""Truncate context to a maximum number of characters."""
|
488 |
if len(context) <= max_chars:
|
489 |
return context
|
@@ -549,7 +541,7 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
|
|
549 |
# Generate content with streaming enabled
|
550 |
for response in client.chat_completion(
|
551 |
messages=messages, # Pass messages in the required format
|
552 |
-
max_tokens=
|
553 |
temperature=temperature,
|
554 |
stream=True,
|
555 |
top_p=0.9,
|
|
|
38 |
print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
|
39 |
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
MODELS = [
|
42 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
43 |
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
|
475 |
|
476 |
logging.info("Finished generating response for Excel data")
|
477 |
|
478 |
+
def truncate_context(context, max_chars=10000):
|
479 |
"""Truncate context to a maximum number of characters."""
|
480 |
if len(context) <= max_chars:
|
481 |
return context
|
|
|
541 |
# Generate content with streaming enabled
|
542 |
for response in client.chat_completion(
|
543 |
messages=messages, # Pass messages in the required format
|
544 |
+
max_tokens=2048, # Reduced to ensure we stay within token limits
|
545 |
temperature=temperature,
|
546 |
stream=True,
|
547 |
top_p=0.9,
|