Spaces:

xl2533
/

FinDoc

Sleeping

App Files Files Community

xl2533 commited on Mar 22, 2023

Commit

d067a6a

1 Parent(s): ed98174

change question and summary in chinese

Browse files

Files changed (5) hide show

app.py +37 -10
prompts/__init__.py +15 -1
prompts/qa_sys_prompt.txt +11 -0
prompts/qa_user_prompt.txt +3 -0
prompts/summary_prompt.txt +7 -0

app.py CHANGED Viewed

@@ -15,9 +15,11 @@ from langchain.prompts.chat import (
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
-from langchain.chains.summarize import load_summarize_chain
 from langchain.chains import QAGenerationChain
 # Streaming endpoint
 API_URL = "https://api.openai.com/v1/chat/completions"
@@ -44,19 +46,44 @@ def process(files, openai_api_key, max_tokens, model, n_sample):
 def get_question(docs, openai_api_key, max_tokens, n_sample=5):
     q_list = []
     llm = ChatOpenAI(openai_api_key=openai_api_key, max_tokens=max_tokens, temperature=0)
-    chain = QAGenerationChain.from_llm(llm)
     print('Generating Question from template')
     for i in range(n_sample):
         qa = chain.run(docs[i].page_content)[0]
         print(qa)
-        q_list.append(f"问题{i+1}: {qa['question']}" )
     return '\n'.join(q_list)
-def get_summary(docs, openai_api_key, max_tokens, n_sample=5):
     llm = ChatOpenAI(openai_api_key=openai_api_key, max_tokens=max_tokens)
-    chain = load_summarize_chain(llm, chain_type="map_reduce")
     print('Generating Summary from tempalte')
     summary = chain.run(docs[:n_sample])
     print(summary)
     return summary
@@ -127,8 +154,8 @@ with gr.Blocks(css="""#col_container {width: 1000px; margin-left: auto; margin-r
                 run = gr.Button('研报解读')
             with gr.Column():
-                summary = gr.Textbox(type='text', label="本文摘要")
-                question = gr.Textbox(type='text', label='提问问题')
         chatbot = gr.Chatbot(elem_id='chatbot')
         inputs = gr.Textbox(placeholder="这篇文档是关于什么的", label="针对文档你有哪些问题？")
@@ -143,8 +170,8 @@ with gr.Blocks(css="""#col_container {width: 1000px; margin-left: auto; margin-r
                   [inputs, openai_api_key, max_tokens, model, chat_counter, chatbot, state],
                   [chatbot, state, chat_counter], )
     start.click(predict,
-              [inputs, openai_api_key, max_tokens, model, chat_counter, chatbot, state],
-              [chatbot, state, chat_counter], )
     # 每次对话结束都重置对话
     clear.click(reset_textbox, [], [inputs], queue=False)

     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
 )
+from langchain.prompts import PromptTemplate
+from langchain.chains.llm import LLMChain
+from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain
 from langchain.chains import QAGenerationChain
+from langchain.chains.combine_documents.stuff import StuffDocumentsChain
 # Streaming endpoint
 API_URL = "https://api.openai.com/v1/chat/completions"
 def get_question(docs, openai_api_key, max_tokens, n_sample=5):
     q_list = []
     llm = ChatOpenAI(openai_api_key=openai_api_key, max_tokens=max_tokens, temperature=0)
+    # 基于文档进行QA生成
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessagePromptTemplate.from_template(MyTemplate['qa_sys_template']),
+            HumanMessagePromptTemplate.from_template(MyTemplate['qa_user_template']),
+        ]
+    )
+    chain = QAGenerationChain.from_llm(llm, prompt=prompt)
     print('Generating Question from template')
     for i in range(n_sample):
         qa = chain.run(docs[i].page_content)[0]
         print(qa)
+        q_list.append(f"问题{i + 1}: {qa['question']}")
     return '\n'.join(q_list)
+def get_summary(docs, openai_api_key, max_tokens, n_sample=5, verbose=None):
     llm = ChatOpenAI(openai_api_key=openai_api_key, max_tokens=max_tokens)
+    # chain = load_summarize_chain(llm, chain_type="map_reduce")
+    # summary = chain.run(docs[:n_sample])
     print('Generating Summary from tempalte')
+    map_prompt = PromptTemplate(template=MyTemplate['summary_template'], input_variables=["text"])
+    combine_prompt = PromptTemplate(template=MyTemplate['summary_template'], input_variables=["text"])
+    map_chain = LLMChain(llm=llm, prompt=map_prompt, verbose=verbose)
+    reduce_chain = LLMChain(llm=llm, prompt=combine_prompt, verbose=verbose)
+    combine_document_chain = StuffDocumentsChain(
+        llm_chain=reduce_chain,
+        document_variable_name='text',
+        verbose=verbose,
+    )
+    chain = MapReduceDocumentsChain(
+        llm_chain=map_chain,
+        combine_document_chain=combine_document_chain,
+        document_variable_name='text',
+        collapse_document_chain=None,
+        verbose=verbose
+    )
     summary = chain.run(docs[:n_sample])
     print(summary)
     return summary
                 run = gr.Button('研报解读')
             with gr.Column():
+                summary = gr.Textbox(type='text', label="一眼看尽 - 文档概览")
+                question = gr.Textbox(type='text', label='推荐问题 - 问别的也行哟')
         chatbot = gr.Chatbot(elem_id='chatbot')
         inputs = gr.Textbox(placeholder="这篇文档是关于什么的", label="针对文档你有哪些问题？")
                   [inputs, openai_api_key, max_tokens, model, chat_counter, chatbot, state],
                   [chatbot, state, chat_counter], )
     start.click(predict,
+                [inputs, openai_api_key, max_tokens, model, chat_counter, chatbot, state],
+                [chatbot, state, chat_counter], )
     # 每次对话结束都重置对话
     clear.click(reset_textbox, [], [inputs], queue=False)

prompts/__init__.py CHANGED Viewed

@@ -14,9 +14,23 @@ with open("prompts/chat_reduce_prompt.txt", "r") as f:
     chat_reduce_template = f.read()
 MyTemplate ={
     'chat_reduce_template': chat_reduce_template,
     'chat_combine_template': chat_combine_template,
     'template_hist': template_hist,
-    'template':template
 }

     chat_reduce_template = f.read()
+with open("prompts/qa_sys_prompt.txt", "r") as f:
+    qa_sys_prompt = f.read()
+with open("prompts/qa_user_prompt.txt", "r") as f:
+    qa_user_prompt = f.read()
+with open("prompts/summary_prompt.txt", "r") as f:
+    summary_prompt = f.read()
 MyTemplate ={
     'chat_reduce_template': chat_reduce_template,
     'chat_combine_template': chat_combine_template,
     'template_hist': template_hist,
+    'template':template,
+    'qa_sys_template': qa_sys_prompt,
+    'qa_user_template': qa_user_prompt,
+    'summary_template': summary_prompt
 }

prompts/qa_sys_prompt.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+You are a smart assistant designed to help high school teachers come up with reading comprehension questions.
+Given a piece of text, you must come up with a question and answer pair that can be used to test a student's reading comprehension abilities.
+When coming up with this question/answer pair, you must respond in the following format, and always respond in chinese:
+```
+{{
+    "question": "$YOUR_QUESTION_HERE",
+    "answer": "$THE_ANSWER_HERE"
+}}
+```
+Everything between the ``` must be valid json.

prompts/qa_user_prompt.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Please come up with a question/answer pair in chinese, in the specified JSON format, for the following text:
+----------------
+{text}

prompts/summary_prompt.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Write a concise summary of the following in chinese:
+"{text}"
+CONCISE SUMMARY: