Spaces:
Sleeping
Sleeping
针对搜索&索引功能增加语言选择
Browse files- ChuanhuChatbot.py +10 -0
- chat_func.py +10 -2
- llama_func.py +5 -2
- presets.py +11 -3
ChuanhuChatbot.py
CHANGED
|
@@ -170,6 +170,12 @@ with gr.Blocks(
|
|
| 170 |
label="实时传输回答", value=True, visible=enable_streaming_option
|
| 171 |
)
|
| 172 |
use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
|
| 174 |
|
| 175 |
with gr.Tab(label="Prompt"):
|
|
@@ -293,6 +299,7 @@ with gr.Blocks(
|
|
| 293 |
model_select_dropdown,
|
| 294 |
use_websearch_checkbox,
|
| 295 |
index_files,
|
|
|
|
| 296 |
],
|
| 297 |
[chatbot, history, status_display, token_count],
|
| 298 |
show_progress=True,
|
|
@@ -315,6 +322,7 @@ with gr.Blocks(
|
|
| 315 |
model_select_dropdown,
|
| 316 |
use_websearch_checkbox,
|
| 317 |
index_files,
|
|
|
|
| 318 |
],
|
| 319 |
[chatbot, history, status_display, token_count],
|
| 320 |
show_progress=True,
|
|
@@ -339,6 +347,7 @@ with gr.Blocks(
|
|
| 339 |
temperature,
|
| 340 |
use_streaming_checkbox,
|
| 341 |
model_select_dropdown,
|
|
|
|
| 342 |
],
|
| 343 |
[chatbot, history, status_display, token_count],
|
| 344 |
show_progress=True,
|
|
@@ -363,6 +372,7 @@ with gr.Blocks(
|
|
| 363 |
temperature,
|
| 364 |
gr.State(0),
|
| 365 |
model_select_dropdown,
|
|
|
|
| 366 |
],
|
| 367 |
[chatbot, history, status_display, token_count],
|
| 368 |
show_progress=True,
|
|
|
|
| 170 |
label="实时传输回答", value=True, visible=enable_streaming_option
|
| 171 |
)
|
| 172 |
use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
|
| 173 |
+
language_select_dropdown = gr.Dropdown(
|
| 174 |
+
label="选择回复语言(针对搜索&索引功能)",
|
| 175 |
+
choices=REPLY_LANGUAGES,
|
| 176 |
+
multiselect=False,
|
| 177 |
+
value=REPLY_LANGUAGES[0]
|
| 178 |
+
)
|
| 179 |
index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
|
| 180 |
|
| 181 |
with gr.Tab(label="Prompt"):
|
|
|
|
| 299 |
model_select_dropdown,
|
| 300 |
use_websearch_checkbox,
|
| 301 |
index_files,
|
| 302 |
+
language_select_dropdown,
|
| 303 |
],
|
| 304 |
[chatbot, history, status_display, token_count],
|
| 305 |
show_progress=True,
|
|
|
|
| 322 |
model_select_dropdown,
|
| 323 |
use_websearch_checkbox,
|
| 324 |
index_files,
|
| 325 |
+
language_select_dropdown,
|
| 326 |
],
|
| 327 |
[chatbot, history, status_display, token_count],
|
| 328 |
show_progress=True,
|
|
|
|
| 347 |
temperature,
|
| 348 |
use_streaming_checkbox,
|
| 349 |
model_select_dropdown,
|
| 350 |
+
language_select_dropdown,
|
| 351 |
],
|
| 352 |
[chatbot, history, status_display, token_count],
|
| 353 |
show_progress=True,
|
|
|
|
| 372 |
temperature,
|
| 373 |
gr.State(0),
|
| 374 |
model_select_dropdown,
|
| 375 |
+
language_select_dropdown,
|
| 376 |
],
|
| 377 |
[chatbot, history, status_display, token_count],
|
| 378 |
show_progress=True,
|
chat_func.py
CHANGED
|
@@ -262,9 +262,12 @@ def predict(
|
|
| 262 |
selected_model=MODELS[0],
|
| 263 |
use_websearch=False,
|
| 264 |
files = None,
|
|
|
|
| 265 |
should_check_token_count=True,
|
| 266 |
): # repetition_penalty, top_k
|
| 267 |
logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
|
|
|
|
|
|
| 268 |
if files:
|
| 269 |
msg = "构建索引中……(这可能需要比较久的时间)"
|
| 270 |
logging.info(msg)
|
|
@@ -272,7 +275,7 @@ def predict(
|
|
| 272 |
index = construct_index(openai_api_key, file_src=files)
|
| 273 |
msg = "索引构建完成,获取回答中……"
|
| 274 |
yield chatbot, history, msg, all_token_counts
|
| 275 |
-
history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot)
|
| 276 |
yield chatbot, history, status_text, all_token_counts
|
| 277 |
return
|
| 278 |
|
|
@@ -292,6 +295,7 @@ def predict(
|
|
| 292 |
replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
|
| 293 |
.replace("{query}", inputs)
|
| 294 |
.replace("{web_results}", "\n\n".join(web_results))
|
|
|
|
| 295 |
)
|
| 296 |
else:
|
| 297 |
link_references = ""
|
|
@@ -389,6 +393,7 @@ def retry(
|
|
| 389 |
temperature,
|
| 390 |
stream=False,
|
| 391 |
selected_model=MODELS[0],
|
|
|
|
| 392 |
):
|
| 393 |
logging.info("重试中……")
|
| 394 |
if len(history) == 0:
|
|
@@ -408,6 +413,7 @@ def retry(
|
|
| 408 |
temperature,
|
| 409 |
stream=stream,
|
| 410 |
selected_model=selected_model,
|
|
|
|
| 411 |
)
|
| 412 |
logging.info("重试中……")
|
| 413 |
for x in iter:
|
|
@@ -425,6 +431,7 @@ def reduce_token_size(
|
|
| 425 |
temperature,
|
| 426 |
max_token_count,
|
| 427 |
selected_model=MODELS[0],
|
|
|
|
| 428 |
):
|
| 429 |
logging.info("开始减少token数量……")
|
| 430 |
iter = predict(
|
|
@@ -438,6 +445,7 @@ def reduce_token_size(
|
|
| 438 |
temperature,
|
| 439 |
selected_model=selected_model,
|
| 440 |
should_check_token_count=False,
|
|
|
|
| 441 |
)
|
| 442 |
logging.info(f"chatbot: {chatbot}")
|
| 443 |
flag = False
|
|
@@ -453,4 +461,4 @@ def reduce_token_size(
|
|
| 453 |
sum(token_count) if len(token_count) > 0 else 0,
|
| 454 |
), token_count
|
| 455 |
logging.info(msg)
|
| 456 |
-
logging.info("减少token数量完毕")
|
|
|
|
| 262 |
selected_model=MODELS[0],
|
| 263 |
use_websearch=False,
|
| 264 |
files = None,
|
| 265 |
+
reply_language="中文",
|
| 266 |
should_check_token_count=True,
|
| 267 |
): # repetition_penalty, top_k
|
| 268 |
logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
| 269 |
+
if reply_language == "跟随问题语言(不稳定)":
|
| 270 |
+
reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
|
| 271 |
if files:
|
| 272 |
msg = "构建索引中……(这可能需要比较久的时间)"
|
| 273 |
logging.info(msg)
|
|
|
|
| 275 |
index = construct_index(openai_api_key, file_src=files)
|
| 276 |
msg = "索引构建完成,获取回答中……"
|
| 277 |
yield chatbot, history, msg, all_token_counts
|
| 278 |
+
history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot, reply_language)
|
| 279 |
yield chatbot, history, status_text, all_token_counts
|
| 280 |
return
|
| 281 |
|
|
|
|
| 295 |
replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
|
| 296 |
.replace("{query}", inputs)
|
| 297 |
.replace("{web_results}", "\n\n".join(web_results))
|
| 298 |
+
.replace("{reply_language}", reply_language )
|
| 299 |
)
|
| 300 |
else:
|
| 301 |
link_references = ""
|
|
|
|
| 393 |
temperature,
|
| 394 |
stream=False,
|
| 395 |
selected_model=MODELS[0],
|
| 396 |
+
reply_language="中文",
|
| 397 |
):
|
| 398 |
logging.info("重试中……")
|
| 399 |
if len(history) == 0:
|
|
|
|
| 413 |
temperature,
|
| 414 |
stream=stream,
|
| 415 |
selected_model=selected_model,
|
| 416 |
+
reply_language=reply_language,
|
| 417 |
)
|
| 418 |
logging.info("重试中……")
|
| 419 |
for x in iter:
|
|
|
|
| 431 |
temperature,
|
| 432 |
max_token_count,
|
| 433 |
selected_model=MODELS[0],
|
| 434 |
+
reply_language="中文",
|
| 435 |
):
|
| 436 |
logging.info("开始减少token数量……")
|
| 437 |
iter = predict(
|
|
|
|
| 445 |
temperature,
|
| 446 |
selected_model=selected_model,
|
| 447 |
should_check_token_count=False,
|
| 448 |
+
reply_language=reply_language,
|
| 449 |
)
|
| 450 |
logging.info(f"chatbot: {chatbot}")
|
| 451 |
flag = False
|
|
|
|
| 461 |
sum(token_count) if len(token_count) > 0 else 0,
|
| 462 |
), token_count
|
| 463 |
logging.info(msg)
|
| 464 |
+
logging.info("减少token数量完毕")
|
llama_func.py
CHANGED
|
@@ -102,6 +102,7 @@ def chat_ai(
|
|
| 102 |
question,
|
| 103 |
context,
|
| 104 |
chatbot,
|
|
|
|
| 105 |
):
|
| 106 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 107 |
|
|
@@ -116,6 +117,7 @@ def chat_ai(
|
|
| 116 |
SIM_K,
|
| 117 |
INDEX_QUERY_TEMPRATURE,
|
| 118 |
context,
|
|
|
|
| 119 |
)
|
| 120 |
if response is None:
|
| 121 |
status_text = "查询失败,请换个问法试试"
|
|
@@ -139,6 +141,7 @@ def ask_ai(
|
|
| 139 |
sim_k=1,
|
| 140 |
temprature=0,
|
| 141 |
prefix_messages=[],
|
|
|
|
| 142 |
):
|
| 143 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 144 |
|
|
@@ -153,8 +156,8 @@ def ask_ai(
|
|
| 153 |
)
|
| 154 |
|
| 155 |
response = None # Initialize response variable to avoid UnboundLocalError
|
| 156 |
-
qa_prompt = QuestionAnswerPrompt(prompt_tmpl)
|
| 157 |
-
rf_prompt = RefinePrompt(refine_tmpl)
|
| 158 |
response = index.query(
|
| 159 |
question,
|
| 160 |
llm_predictor=llm_predictor,
|
|
|
|
| 102 |
question,
|
| 103 |
context,
|
| 104 |
chatbot,
|
| 105 |
+
reply_language,
|
| 106 |
):
|
| 107 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 108 |
|
|
|
|
| 117 |
SIM_K,
|
| 118 |
INDEX_QUERY_TEMPRATURE,
|
| 119 |
context,
|
| 120 |
+
reply_language,
|
| 121 |
)
|
| 122 |
if response is None:
|
| 123 |
status_text = "查询失败,请换个问法试试"
|
|
|
|
| 141 |
sim_k=1,
|
| 142 |
temprature=0,
|
| 143 |
prefix_messages=[],
|
| 144 |
+
reply_language="中文",
|
| 145 |
):
|
| 146 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 147 |
|
|
|
|
| 156 |
)
|
| 157 |
|
| 158 |
response = None # Initialize response variable to avoid UnboundLocalError
|
| 159 |
+
qa_prompt = QuestionAnswerPrompt(prompt_tmpl.replace("{reply_language}", reply_language))
|
| 160 |
+
rf_prompt = RefinePrompt(refine_tmpl.replace("{reply_language}", reply_language))
|
| 161 |
response = index.query(
|
| 162 |
question,
|
| 163 |
llm_predictor=llm_predictor,
|
presets.py
CHANGED
|
@@ -49,6 +49,13 @@ MODELS = [
|
|
| 49 |
"gpt-4-32k-0314",
|
| 50 |
] # 可选的模型
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
WEBSEARCH_PTOMPT_TEMPLATE = """\
|
| 54 |
Web search results:
|
|
@@ -58,7 +65,8 @@ Current date: {current_date}
|
|
| 58 |
|
| 59 |
Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
| 60 |
Query: {query}
|
| 61 |
-
Reply in
|
|
|
|
| 62 |
|
| 63 |
PROMPT_TEMPLATE = """\
|
| 64 |
Context information is below.
|
|
@@ -71,7 +79,7 @@ Make sure to cite results using [number] notation after the reference.
|
|
| 71 |
If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
|
| 72 |
Use prior knowledge only if the given context didn't provide enough information.
|
| 73 |
Answer the question: {query_str}
|
| 74 |
-
Reply in
|
| 75 |
"""
|
| 76 |
|
| 77 |
REFINE_TEMPLATE = """\
|
|
@@ -83,6 +91,6 @@ We have the opportunity to refine the existing answer
|
|
| 83 |
{context_msg}
|
| 84 |
------------
|
| 85 |
Given the new context, refine the original answer to better
|
| 86 |
-
|
| 87 |
If the context isn't useful, return the original answer.
|
| 88 |
"""
|
|
|
|
| 49 |
"gpt-4-32k-0314",
|
| 50 |
] # 可选的模型
|
| 51 |
|
| 52 |
+
REPLY_LANGUAGES = [
|
| 53 |
+
"中文",
|
| 54 |
+
"English",
|
| 55 |
+
"日本語",
|
| 56 |
+
"跟随问题语言(不稳定)"
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
|
| 60 |
WEBSEARCH_PTOMPT_TEMPLATE = """\
|
| 61 |
Web search results:
|
|
|
|
| 65 |
|
| 66 |
Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
| 67 |
Query: {query}
|
| 68 |
+
Reply in {reply_language}
|
| 69 |
+
"""
|
| 70 |
|
| 71 |
PROMPT_TEMPLATE = """\
|
| 72 |
Context information is below.
|
|
|
|
| 79 |
If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
|
| 80 |
Use prior knowledge only if the given context didn't provide enough information.
|
| 81 |
Answer the question: {query_str}
|
| 82 |
+
Reply in {reply_language}
|
| 83 |
"""
|
| 84 |
|
| 85 |
REFINE_TEMPLATE = """\
|
|
|
|
| 91 |
{context_msg}
|
| 92 |
------------
|
| 93 |
Given the new context, refine the original answer to better
|
| 94 |
+
Reply in {reply_language}
|
| 95 |
If the context isn't useful, return the original answer.
|
| 96 |
"""
|