Sentinel-AI-Beta-Test

Sleeping

Shreyas094 commited on Sep 1, 2024

Commit

5ed09d4

verified ·

1 Parent(s): a7533b2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,14 +38,6 @@ print(f"ACCOUNT_ID: {ACCOUNT_ID}")
 print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
-# Initialize the Hugging Face client
-client = InferenceClient(model="meta-llama/Llama-3.1", token=huggingface_token)
-# Assuming the API supports such a call (pseudocode example):
-metadata = client.get_model_metadata()
-print(metadata["max_input_tokens"])  # This would be a hypothetical field
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -483,7 +475,7 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
         logging.info("Finished generating response for Excel data")
-def truncate_context(context, max_chars=16000):
     """Truncate context to a maximum number of characters."""
     if len(context) <= max_chars:
         return context
@@ -549,7 +541,7 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
             # Generate content with streaming enabled
             for response in client.chat_completion(
                 messages=messages,  # Pass messages in the required format
-                max_tokens=3000,  # Reduced to ensure we stay within token limits
                 temperature=temperature,
                 stream=True,
                 top_p=0.9,

 print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "mistralai/Mixtral-8x7B-Instruct-v0.1",
         logging.info("Finished generating response for Excel data")
+def truncate_context(context, max_chars=10000):
     """Truncate context to a maximum number of characters."""
     if len(context) <= max_chars:
         return context
             # Generate content with streaming enabled
             for response in client.chat_completion(
                 messages=messages,  # Pass messages in the required format
+                max_tokens=2048,  # Reduced to ensure we stay within token limits
                 temperature=temperature,
                 stream=True,
                 top_p=0.9,