Shreyas094 commited on
Commit
5ed09d4
·
verified ·
1 Parent(s): a7533b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -38,14 +38,6 @@ print(f"ACCOUNT_ID: {ACCOUNT_ID}")
38
  print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
39
 
40
 
41
- # Initialize the Hugging Face client
42
- client = InferenceClient(model="meta-llama/Llama-3.1", token=huggingface_token)
43
-
44
- # Assuming the API supports such a call (pseudocode example):
45
- metadata = client.get_model_metadata()
46
- print(metadata["max_input_tokens"]) # This would be a hypothetical field
47
-
48
-
49
  MODELS = [
50
  "mistralai/Mistral-7B-Instruct-v0.3",
51
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -483,7 +475,7 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
483
 
484
  logging.info("Finished generating response for Excel data")
485
 
486
- def truncate_context(context, max_chars=16000):
487
  """Truncate context to a maximum number of characters."""
488
  if len(context) <= max_chars:
489
  return context
@@ -549,7 +541,7 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
549
  # Generate content with streaming enabled
550
  for response in client.chat_completion(
551
  messages=messages, # Pass messages in the required format
552
- max_tokens=3000, # Reduced to ensure we stay within token limits
553
  temperature=temperature,
554
  stream=True,
555
  top_p=0.9,
 
38
  print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
39
 
40
 
 
 
 
 
 
 
 
 
41
  MODELS = [
42
  "mistralai/Mistral-7B-Instruct-v0.3",
43
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
 
475
 
476
  logging.info("Finished generating response for Excel data")
477
 
478
+ def truncate_context(context, max_chars=10000):
479
  """Truncate context to a maximum number of characters."""
480
  if len(context) <= max_chars:
481
  return context
 
541
  # Generate content with streaming enabled
542
  for response in client.chat_completion(
543
  messages=messages, # Pass messages in the required format
544
+ max_tokens=2048, # Reduced to ensure we stay within token limits
545
  temperature=temperature,
546
  stream=True,
547
  top_p=0.9,