Spaces:

Jiangxz
/

Taiwan_Tax_KB

Runtime error

App Files Files

Jiangxz commited on Sep 30, 2024

Commit

f59d0de

verified ·

1 Parent(s): 03a05f9

Upload app.py

Browse files

Files changed (1) hide show

app.py +14 -16

app.py CHANGED Viewed

@@ -17,13 +17,6 @@ import gradio as gr
 import re
 import time
-def initialize_llm(api_key):
-    return ChatGroq(
-        groq_api_key=api_key,
-        model_name='llama-3.1-70b-versatile'
-    )
-print(f"成功初始化大型語言模型（LLM）")
 def load_documents(sources):
     documents = []
     for source in sources:
@@ -66,7 +59,7 @@ print(f"\n成功載入 {len(documents)} 個網址或檔案")
 text_splitter = RecursiveCharacterTextSplitter(
     chunk_size=512,
-    chunk_overlap=52,
     length_function=len,
     is_separator_regex=False,
     separators=["\n\n\n","\n\n", "\n", "。"]
@@ -76,7 +69,7 @@ split_docs = text_splitter.split_documents(documents)
 print(f"分割後的文件數量：{len(split_docs)}")
 embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
-print(f"\n成功初始化微軟嵌入模型")
 print(f"\n開始建立向量資料庫")
 vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
@@ -90,7 +83,7 @@ retriever = vectorstore.as_retriever(
         "lambda_mult": 0.8
     }
 )
-print(f"成功建立檢索器，搜尋演算法：Maximum Marginal Relevance Retrieval")
 template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
 在回答之前，請仔細分析檢索到的上下文，確保你的回答準確完整反映了上下文中的訊息，而不是依賴先前的知識，在回應的答案中絕對不要提到是根據上下文回答。
@@ -106,7 +99,6 @@ template = """Let's work this out in a step by step way to be sure we have the r
 PROMPT = PromptTemplate(
     template=template, input_variables=["context", "question"]
 )
-print(f"成功定義 Prompt Template")
 def create_chain(llm):
     return RetrievalQA.from_chain_type(
@@ -118,16 +110,22 @@ def create_chain(llm):
     )
 print(f"成功建立 RAG Chain")
 def generate_insight_questions(query, api_key):
     llm = initialize_llm(api_key)
-    prompt = f"""Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
     根據以下回答，生成3個相關的洞察問題：
     原始問題： {query}
     請提供3個簡短但有深度的問題，這些問題應該符合：
     1. 與原始問題緊密相關
-    2. 重新準確描述原始問題
     3. 引導更深入的解決原始問題
     請直接列出這3個問題，每個問題一行，不要添加編號或其他文字。
@@ -147,7 +145,7 @@ def generate_insight_questions(query, api_key):
 def answer_question(query, api_key):
     try:
-        gr.Info("檢索地方稅知識庫中，請稍待片刻......")
         llm = initialize_llm(api_key)
         chain = create_chain(llm)
         result = chain.invoke({"query": query})
@@ -197,7 +195,7 @@ def handle_interaction(query, api_key, state):
     while len(insight_questions) < 3:
         insight_questions.append("提供更多地方稅資訊")
     end_time = time.time()
-    gr.Info(f"Model 已完成回覆，總執行時間： {(end_time - start_time):.2f} 秒。")
     return answer, insight_questions[0], insight_questions[1], insight_questions[2], state, query
 custom_css = """
@@ -280,7 +278,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
     > ### **※ RAG-based 系統部署：江信宗，LLM：Llama-3.1-70B，以地方稅極少知識資料示範，僅供參考，準確資訊請依據地方稅稽徵機關回覆為準。**
     """, elem_classes="center-text")
     with gr.Row():
-        query_input = gr.Textbox(label="輸入您的問題，系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題（支援同時輸入多個問題，例如：問題1？問題2？）", scale=3, elem_classes="query-input")
         api_key_input = gr.Textbox(label="請輸入您的 API Key", type="password", placeholder="API authentication key", scale=1, elem_classes="api-key-input")
     answer_output = gr.Textbox(label="答案：", max_lines=40, elem_classes="answer-box")
     with gr.Row():

 import re
 import time
 def load_documents(sources):
     documents = []
     for source in sources:
 text_splitter = RecursiveCharacterTextSplitter(
     chunk_size=512,
+    chunk_overlap=50,
     length_function=len,
     is_separator_regex=False,
     separators=["\n\n\n","\n\n", "\n", "。"]
 print(f"分割後的文件數量：{len(split_docs)}")
 embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
+print(f"\n成功初始化 Microsoft 嵌入模型")
 print(f"\n開始建立向量資料庫")
 vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
         "lambda_mult": 0.8
     }
 )
+print(f"檢索演算法：Maximum Marginal Relevance Retrieval")
 template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
 在回答之前，請仔細分析檢索到的上下文，確保你的回答準確完整反映了上下文中的訊息，而不是依賴先前的知識，在回應的答案中絕對不要提到是根據上下文回答。
 PROMPT = PromptTemplate(
     template=template, input_variables=["context", "question"]
 )
 def create_chain(llm):
     return RetrievalQA.from_chain_type(
     )
 print(f"成功建立 RAG Chain")
+def initialize_llm(api_key):
+    return ChatGroq(
+        groq_api_key=api_key,
+        model_name='llama-3.1-70b-versatile'
+    )
 def generate_insight_questions(query, api_key):
     llm = initialize_llm(api_key)
+    prompt = f"""Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in "Traditional Chinese".
     根據以下回答，生成3個相關的洞察問題：
     原始問題： {query}
     請提供3個簡短但有深度的問題，這些問題應該符合：
     1. 與原始問題緊密相關
+    2. 準確重新描述原始問題
     3. 引導更深入的解決原始問題
     請直接列出這3個問題，每個問題一行，不要添加編號或其他文字。
 def answer_question(query, api_key):
     try:
+        gr.Info("檢索地方稅知識庫中......")
         llm = initialize_llm(api_key)
         chain = create_chain(llm)
         result = chain.invoke({"query": query})
     while len(insight_questions) < 3:
         insight_questions.append("提供更多地方稅資訊")
     end_time = time.time()
+    gr.Info(f"Model 已答覆，執行時間： {(end_time - start_time):.2f} 秒。")
     return answer, insight_questions[0], insight_questions[1], insight_questions[2], state, query
 custom_css = """
     > ### **※ RAG-based 系統部署：江信宗，LLM：Llama-3.1-70B，以地方稅極少知識資料示範，僅供參考，準確資訊請依據地方稅稽徵機關回覆為準。**
     """, elem_classes="center-text")
     with gr.Row():
+        query_input = gr.Textbox(label="輸入您的問題，系統將基於學習到的知識資料提供相關答案。", placeholder="請輸入您的問題（支援同時輸入多個問題，例如：問題1？問題2？）", scale=3, max_lines=5, elem_classes="query-input")
         api_key_input = gr.Textbox(label="請輸入您的 API Key", type="password", placeholder="API authentication key", scale=1, elem_classes="api-key-input")
     answer_output = gr.Textbox(label="答案：", max_lines=40, elem_classes="answer-box")
     with gr.Row():