Spaces:

Jiangxz
/

Taiwan_Tax_KB

Runtime error

App Files Files

Jiangxz commited on Sep 24, 2024

Commit

94d5fbd

verified ·

1 Parent(s): 33db524

Upload app.py

Browse files

Files changed (1) hide show

app.py +14 -10

app.py CHANGED Viewed

@@ -4,7 +4,6 @@
 import os
 from dotenv import load_dotenv
 load_dotenv()
-os.environ["LANGCHAIN_COMMUNITY__USER_AGENT"] = "Taiwan_Tax_Knowledge-base"
 from langchain_community.utils import user_agent
 from langchain_groq import ChatGroq
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -18,7 +17,6 @@ import gradio as gr
 import re
 def initialize_llm(api_key):
-    os.environ["GROQ_API_KEY"] = api_key
     return ChatGroq(
         groq_api_key=api_key,
         model_name='llama-3.1-70b-versatile'
@@ -67,8 +65,9 @@ print(f"\n成功載入 {len(documents)} 個網址或檔案")
 text_splitter = RecursiveCharacterTextSplitter(
     chunk_size=512,
-    chunk_overlap=50,
     length_function=len,
     separators=["\n\n\n","\n\n", "\n", "。"]
 )
@@ -76,7 +75,7 @@ split_docs = text_splitter.split_documents(documents)
 print(f"分割後的文件數量：{len(split_docs)}")
 embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
-print(f"\n成功初始化嵌入模型")
 print(f"\n開始建立向量資料庫")
 vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
@@ -93,8 +92,8 @@ retriever = vectorstore.as_retriever(
 print(f"成功建立檢索器，搜尋演算法：Maximum Marginal Relevance Retrieval")
 template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
-在回答之前，請仔細分析檢索到的上下文，確保你的回答準確完整反映了上下文中的訊息，而不是依賴先前的知識，在回應的答案中不要提到是根據上下文回答。
-如果檢索到的多個上下文之間存在聯繫，請整合這些訊息以提供全面的回答，但要避免過度推斷。
 如果檢索到的上下文不包含足夠回答問題的訊息，請誠實的說明，不要試圖編造答案。
 上下文： {context}
@@ -127,8 +126,8 @@ def generate_insight_questions(query, api_key):
     請提供3個簡短但有深度的問題，這些問題應該符合：
     1. 與原始問題緊密相關
-    2. 能夠引導原始問題更深入的解決問題
-    3. 涵蓋與原始問題不同的面向或角度
     請直接列出這3個問題，每個問題一行，不要添加編號或其他文字。
     """
@@ -149,7 +148,7 @@ def answer_question(query, api_key):
     try:
         llm = initialize_llm(api_key)
         chain = create_chain(llm)
-        result = chain({"query": query})
         answer = result["result"]
         insight_questions = generate_insight_questions(query, api_key)
         while len(insight_questions) < 3:
@@ -186,6 +185,8 @@ def convert_punctuation(text):
 def handle_interaction(query, api_key, state):
     if state is None:
         state = {"history": []}
     query = convert_punctuation(query)
     answer, insight_questions = answer_multiple_questions(query, api_key)
     state["history"].append((query, answer))
@@ -286,4 +287,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
         )
 if __name__ == "__main__":
-    iface.launch(share=True, debug=True)

 import os
 from dotenv import load_dotenv
 load_dotenv()
 from langchain_community.utils import user_agent
 from langchain_groq import ChatGroq
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import re
 def initialize_llm(api_key):
     return ChatGroq(
         groq_api_key=api_key,
         model_name='llama-3.1-70b-versatile'
 text_splitter = RecursiveCharacterTextSplitter(
     chunk_size=512,
+    chunk_overlap=64,
     length_function=len,
+    is_separator_regex=False,
     separators=["\n\n\n","\n\n", "\n", "。"]
 )
 print(f"分割後的文件數量：{len(split_docs)}")
 embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
+print(f"\n成功初始化微軟嵌入模型")
 print(f"\n開始建立向量資料庫")
 vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
 print(f"成功建立檢索器，搜尋演算法：Maximum Marginal Relevance Retrieval")
 template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
+在回答之前，請仔細分析檢索到的上下文，確保你的回答準確完整反映了上下文中的訊息，而不是依賴先前的知識，在回應的答案中絕對不要提到是根據上下文回答。
+如果檢索到的多個上下文之間存在聯繫，請整合這些訊息以提供更全面的回答，但要避免過度推斷。
 如果檢索到的上下文不包含足夠回答問題的訊息，請誠實的說明，不要試圖編造答案。
 上下文： {context}
     請提供3個簡短但有深度的問題，這些問題應該符合：
     1. 與原始問題緊密相關
+    2. 重新準確描述原始問題
+    3. 引導更深入的解決原始問題
     請直接列出這3個問題，每個問題一行，不要添加編號或其他文字。
     """
     try:
         llm = initialize_llm(api_key)
         chain = create_chain(llm)
+        result = chain.invoke({"query": query})
         answer = result["result"]
         insight_questions = generate_insight_questions(query, api_key)
         while len(insight_questions) < 3:
 def handle_interaction(query, api_key, state):
     if state is None:
         state = {"history": []}
+    if not api_key:
+        api_key = os.getenv("Llama70B_Key")
     query = convert_punctuation(query)
     answer, insight_questions = answer_multiple_questions(query, api_key)
     state["history"].append((query, answer))
         )
 if __name__ == "__main__":
+    if "SPACE_ID" in os.environ:
+        iface.launch()
+    else:
+        iface.launch(share=True)