Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -217,14 +217,14 @@ def get_model(temperature, top_p, repetition_penalty):
|
|
217 |
"temperature": temperature,
|
218 |
"top_p": top_p,
|
219 |
"repetition_penalty": repetition_penalty,
|
220 |
-
"max_length":
|
221 |
},
|
222 |
huggingfacehub_api_token=huggingface_token
|
223 |
)
|
224 |
|
225 |
-
MAX_PROMPT_CHARS =
|
226 |
|
227 |
-
def chunk_text(text: str, max_chunk_size: int =
|
228 |
chunks = []
|
229 |
current_chunk = ""
|
230 |
for sentence in re.split(r'(?<=[.!?])\s+', text):
|
@@ -244,7 +244,7 @@ def get_most_relevant_chunks(question: str, chunks: List[str], top_k: int = 3) -
|
|
244 |
top_indices = np.argsort(similarities)[-top_k:]
|
245 |
return [chunks[i] for i in top_indices]
|
246 |
|
247 |
-
def generate_chunked_response(model, prompt, max_tokens=
|
248 |
full_response = ""
|
249 |
for i in range(max_chunks):
|
250 |
try:
|
@@ -395,8 +395,8 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search, c
|
|
395 |
database = None
|
396 |
|
397 |
max_attempts = 3
|
398 |
-
max_input_tokens =
|
399 |
-
max_output_tokens =
|
400 |
|
401 |
if web_search:
|
402 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|
|
|
217 |
"temperature": temperature,
|
218 |
"top_p": top_p,
|
219 |
"repetition_penalty": repetition_penalty,
|
220 |
+
"max_length": 800
|
221 |
},
|
222 |
huggingfacehub_api_token=huggingface_token
|
223 |
)
|
224 |
|
225 |
+
MAX_PROMPT_CHARS = 20000 # Adjust based on your model's limitations
|
226 |
|
227 |
+
def chunk_text(text: str, max_chunk_size: int = 800) -> List[str]:
|
228 |
chunks = []
|
229 |
current_chunk = ""
|
230 |
for sentence in re.split(r'(?<=[.!?])\s+', text):
|
|
|
244 |
top_indices = np.argsort(similarities)[-top_k:]
|
245 |
return [chunks[i] for i in top_indices]
|
246 |
|
247 |
+
def generate_chunked_response(model, prompt, max_tokens=800, max_chunks=5):
|
248 |
full_response = ""
|
249 |
for i in range(max_chunks):
|
250 |
try:
|
|
|
395 |
database = None
|
396 |
|
397 |
max_attempts = 3
|
398 |
+
max_input_tokens = 20000 # Leave room for the model's response
|
399 |
+
max_output_tokens = 800
|
400 |
|
401 |
if web_search:
|
402 |
contextualized_question, topics, entity_tracker, _ = chatbot.process_question(question)
|