Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,14 +38,6 @@ print(f"ACCOUNT_ID: {ACCOUNT_ID}")
|
|
| 38 |
print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
|
| 39 |
|
| 40 |
|
| 41 |
-
# Initialize the Hugging Face client
|
| 42 |
-
client = InferenceClient(model="meta-llama/Llama-3.1", token=huggingface_token)
|
| 43 |
-
|
| 44 |
-
# Assuming the API supports such a call (pseudocode example):
|
| 45 |
-
metadata = client.get_model_metadata()
|
| 46 |
-
print(metadata["max_input_tokens"]) # This would be a hypothetical field
|
| 47 |
-
|
| 48 |
-
|
| 49 |
MODELS = [
|
| 50 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 51 |
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
@@ -483,7 +475,7 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
|
|
| 483 |
|
| 484 |
logging.info("Finished generating response for Excel data")
|
| 485 |
|
| 486 |
-
def truncate_context(context, max_chars=
|
| 487 |
"""Truncate context to a maximum number of characters."""
|
| 488 |
if len(context) <= max_chars:
|
| 489 |
return context
|
|
@@ -549,7 +541,7 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
|
|
| 549 |
# Generate content with streaming enabled
|
| 550 |
for response in client.chat_completion(
|
| 551 |
messages=messages, # Pass messages in the required format
|
| 552 |
-
max_tokens=
|
| 553 |
temperature=temperature,
|
| 554 |
stream=True,
|
| 555 |
top_p=0.9,
|
|
|
|
| 38 |
print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
|
| 39 |
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
MODELS = [
|
| 42 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 43 |
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
|
|
| 475 |
|
| 476 |
logging.info("Finished generating response for Excel data")
|
| 477 |
|
| 478 |
+
def truncate_context(context, max_chars=10000):
|
| 479 |
"""Truncate context to a maximum number of characters."""
|
| 480 |
if len(context) <= max_chars:
|
| 481 |
return context
|
|
|
|
| 541 |
# Generate content with streaming enabled
|
| 542 |
for response in client.chat_completion(
|
| 543 |
messages=messages, # Pass messages in the required format
|
| 544 |
+
max_tokens=2048, # Reduced to ensure we stay within token limits
|
| 545 |
temperature=temperature,
|
| 546 |
stream=True,
|
| 547 |
top_p=0.9,
|