Spaces:

markqiu
/

prinvest_mate

Sleeping

App Files Files Community

MZhaovo commited on Mar 23, 2023

Commit

be28103

1 Parent(s): 3fe8fc4

针对搜索&索引功能增加语言选择

Browse files

Files changed (4) hide show

ChuanhuChatbot.py +10 -0
chat_func.py +10 -2
llama_func.py +5 -2
presets.py +11 -3

ChuanhuChatbot.py CHANGED Viewed

@@ -170,6 +170,12 @@ with gr.Blocks(
                         label="实时传输回答", value=True, visible=enable_streaming_option
                     )
                     use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
                     index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
                 with gr.Tab(label="Prompt"):
@@ -293,6 +299,7 @@ with gr.Blocks(
             model_select_dropdown,
             use_websearch_checkbox,
             index_files,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,
@@ -315,6 +322,7 @@ with gr.Blocks(
             model_select_dropdown,
             use_websearch_checkbox,
             index_files,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,
@@ -339,6 +347,7 @@ with gr.Blocks(
             temperature,
             use_streaming_checkbox,
             model_select_dropdown,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,
@@ -363,6 +372,7 @@ with gr.Blocks(
             temperature,
             gr.State(0),
             model_select_dropdown,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,

                         label="实时传输回答", value=True, visible=enable_streaming_option
                     )
                     use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
+                    language_select_dropdown = gr.Dropdown(
+                        label="选择回复语言（针对搜索&索引功能）",
+                        choices=REPLY_LANGUAGES,
+                        multiselect=False,
+                        value=REPLY_LANGUAGES[0]
+                        )
                     index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
                 with gr.Tab(label="Prompt"):
             model_select_dropdown,
             use_websearch_checkbox,
             index_files,
+            language_select_dropdown,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,
             model_select_dropdown,
             use_websearch_checkbox,
             index_files,
+            language_select_dropdown,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,
             temperature,
             use_streaming_checkbox,
             model_select_dropdown,
+            language_select_dropdown,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,
             temperature,
             gr.State(0),
             model_select_dropdown,
+            language_select_dropdown,
         ],
         [chatbot, history, status_display, token_count],
         show_progress=True,

chat_func.py CHANGED Viewed

@@ -262,9 +262,12 @@ def predict(
     selected_model=MODELS[0],
     use_websearch=False,
     files = None,
     should_check_token_count=True,
 ):  # repetition_penalty, top_k
     logging.info("输入为：" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
     if files:
         msg = "构建索引中……（这可能需要比较久的时间）"
         logging.info(msg)
@@ -272,7 +275,7 @@ def predict(
         index = construct_index(openai_api_key, file_src=files)
         msg = "索引构建完成，获取回答中……"
         yield chatbot, history, msg, all_token_counts
-        history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot)
         yield chatbot, history, status_text, all_token_counts
         return
@@ -292,6 +295,7 @@ def predict(
             replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
             .replace("{query}", inputs)
             .replace("{web_results}", "\n\n".join(web_results))
         )
     else:
         link_references = ""
@@ -389,6 +393,7 @@ def retry(
     temperature,
     stream=False,
     selected_model=MODELS[0],
 ):
     logging.info("重试中……")
     if len(history) == 0:
@@ -408,6 +413,7 @@ def retry(
         temperature,
         stream=stream,
         selected_model=selected_model,
     )
     logging.info("重试中……")
     for x in iter:
@@ -425,6 +431,7 @@ def reduce_token_size(
     temperature,
     max_token_count,
     selected_model=MODELS[0],
 ):
     logging.info("开始减少token数量……")
     iter = predict(
@@ -438,6 +445,7 @@ def reduce_token_size(
         temperature,
         selected_model=selected_model,
         should_check_token_count=False,
     )
     logging.info(f"chatbot: {chatbot}")
     flag = False
@@ -453,4 +461,4 @@ def reduce_token_size(
             sum(token_count) if len(token_count) > 0 else 0,
         ), token_count
     logging.info(msg)
-    logging.info("减少token数量完毕")

     selected_model=MODELS[0],
     use_websearch=False,
     files = None,
+    reply_language="中文",
     should_check_token_count=True,
 ):  # repetition_penalty, top_k
     logging.info("输入为：" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
+    if reply_language == "跟随问题语言（不稳定）":
+        reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
     if files:
         msg = "构建索引中……（这可能需要比较久的时间）"
         logging.info(msg)
         index = construct_index(openai_api_key, file_src=files)
         msg = "索引构建完成，获取回答中……"
         yield chatbot, history, msg, all_token_counts
+        history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot, reply_language)
         yield chatbot, history, status_text, all_token_counts
         return
             replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
             .replace("{query}", inputs)
             .replace("{web_results}", "\n\n".join(web_results))
+            .replace("{reply_language}", reply_language )
         )
     else:
         link_references = ""
     temperature,
     stream=False,
     selected_model=MODELS[0],
+    reply_language="中文",
 ):
     logging.info("重试中……")
     if len(history) == 0:
         temperature,
         stream=stream,
         selected_model=selected_model,
+        reply_language=reply_language,
     )
     logging.info("重试中……")
     for x in iter:
     temperature,
     max_token_count,
     selected_model=MODELS[0],
+    reply_language="中文",
 ):
     logging.info("开始减少token数量……")
     iter = predict(
         temperature,
         selected_model=selected_model,
         should_check_token_count=False,
+        reply_language=reply_language,
     )
     logging.info(f"chatbot: {chatbot}")
     flag = False
             sum(token_count) if len(token_count) > 0 else 0,
         ), token_count
     logging.info(msg)
+    logging.info("减少token数量完毕")

llama_func.py CHANGED Viewed

@@ -102,6 +102,7 @@ def chat_ai(
     question,
     context,
     chatbot,
 ):
     os.environ["OPENAI_API_KEY"] = api_key
@@ -116,6 +117,7 @@ def chat_ai(
         SIM_K,
         INDEX_QUERY_TEMPRATURE,
         context,
     )
     if response is None:
         status_text = "查询失败，请换个问法试试"
@@ -139,6 +141,7 @@ def ask_ai(
     sim_k=1,
     temprature=0,
     prefix_messages=[],
 ):
     os.environ["OPENAI_API_KEY"] = api_key
@@ -153,8 +156,8 @@ def ask_ai(
     )
     response = None  # Initialize response variable to avoid UnboundLocalError
-    qa_prompt = QuestionAnswerPrompt(prompt_tmpl)
-    rf_prompt = RefinePrompt(refine_tmpl)
     response = index.query(
         question,
         llm_predictor=llm_predictor,

     question,
     context,
     chatbot,
+    reply_language,
 ):
     os.environ["OPENAI_API_KEY"] = api_key
         SIM_K,
         INDEX_QUERY_TEMPRATURE,
         context,
+        reply_language,
     )
     if response is None:
         status_text = "查询失败，请换个问法试试"
     sim_k=1,
     temprature=0,
     prefix_messages=[],
+    reply_language="中文",
 ):
     os.environ["OPENAI_API_KEY"] = api_key
     )
     response = None  # Initialize response variable to avoid UnboundLocalError
+    qa_prompt = QuestionAnswerPrompt(prompt_tmpl.replace("{reply_language}", reply_language))
+    rf_prompt = RefinePrompt(refine_tmpl.replace("{reply_language}", reply_language))
     response = index.query(
         question,
         llm_predictor=llm_predictor,

presets.py CHANGED Viewed

@@ -49,6 +49,13 @@ MODELS = [
     "gpt-4-32k-0314",
 ]  # 可选的模型
 WEBSEARCH_PTOMPT_TEMPLATE = """\
 Web search results:
@@ -58,7 +65,8 @@ Current date: {current_date}
 Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
 Query: {query}
-Reply in 中文"""
 PROMPT_TEMPLATE = """\
 Context information is below.
@@ -71,7 +79,7 @@ Make sure to cite results using [number] notation after the reference.
 If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
 Use prior knowledge only if the given context didn't provide enough information.
 Answer the question: {query_str}
-Reply in 中文
 """
 REFINE_TEMPLATE = """\
@@ -83,6 +91,6 @@ We have the opportunity to refine the existing answer
 {context_msg}
 ------------
 Given the new context, refine the original answer to better
-Answer in the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch.
 If the context isn't useful, return the original answer.
 """

     "gpt-4-32k-0314",
 ]  # 可选的模型
+REPLY_LANGUAGES = [
+    "中文",
+    "English",
+    "日本語",
+    "跟随问题语言（不稳定）"
+]
 WEBSEARCH_PTOMPT_TEMPLATE = """\
 Web search results:
 Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
 Query: {query}
+Reply in {reply_language}
+"""
 PROMPT_TEMPLATE = """\
 Context information is below.
 If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
 Use prior knowledge only if the given context didn't provide enough information.
 Answer the question: {query_str}
+Reply in {reply_language}
 """
 REFINE_TEMPLATE = """\
 {context_msg}
 ------------
 Given the new context, refine the original answer to better
+Reply in {reply_language}
 If the context isn't useful, return the original answer.
 """