Jiangxz commited on
Commit
94d5fbd
·
verified ·
1 Parent(s): 33db524

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -4,7 +4,6 @@
4
  import os
5
  from dotenv import load_dotenv
6
  load_dotenv()
7
- os.environ["LANGCHAIN_COMMUNITY__USER_AGENT"] = "Taiwan_Tax_Knowledge-base"
8
  from langchain_community.utils import user_agent
9
  from langchain_groq import ChatGroq
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -18,7 +17,6 @@ import gradio as gr
18
  import re
19
 
20
  def initialize_llm(api_key):
21
- os.environ["GROQ_API_KEY"] = api_key
22
  return ChatGroq(
23
  groq_api_key=api_key,
24
  model_name='llama-3.1-70b-versatile'
@@ -67,8 +65,9 @@ print(f"\n成功載入 {len(documents)} 個網址或檔案")
67
 
68
  text_splitter = RecursiveCharacterTextSplitter(
69
  chunk_size=512,
70
- chunk_overlap=50,
71
  length_function=len,
 
72
  separators=["\n\n\n","\n\n", "\n", "。"]
73
  )
74
 
@@ -76,7 +75,7 @@ split_docs = text_splitter.split_documents(documents)
76
  print(f"分割後的文件數量:{len(split_docs)}")
77
 
78
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
79
- print(f"\n成功初始化嵌入模型")
80
 
81
  print(f"\n開始建立向量資料庫")
82
  vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
@@ -93,8 +92,8 @@ retriever = vectorstore.as_retriever(
93
  print(f"成功建立檢索器,搜尋演算法:Maximum Marginal Relevance Retrieval")
94
 
95
  template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
96
- 在回答之前,請仔細分析檢索到的上下文,確保你的回答準確完整反映了上下文中的訊息,而不是依賴先前的知識,在回應的答案中不要提到是根據上下文回答。
97
- 如果檢索到的多個上下文之間存在聯繫,請整合這些訊息以提供全面的回答,但要避免過度推斷。
98
  如果檢索到的上下文不包含足夠回答問題的訊息,請誠實的說明,不要試圖編造答案。
99
 
100
  上下文: {context}
@@ -127,8 +126,8 @@ def generate_insight_questions(query, api_key):
127
 
128
  請提供3個簡短但有深度的問題,這些問題應該符合:
129
  1. 與原始問題緊密相關
130
- 2. 能夠引導原始問題更深入的解決問題
131
- 3. 涵蓋與原始問題不同的面向或角度
132
 
133
  請直接列出這3個問題,每個問題一行,不要添加編號或其他文字。
134
  """
@@ -149,7 +148,7 @@ def answer_question(query, api_key):
149
  try:
150
  llm = initialize_llm(api_key)
151
  chain = create_chain(llm)
152
- result = chain({"query": query})
153
  answer = result["result"]
154
  insight_questions = generate_insight_questions(query, api_key)
155
  while len(insight_questions) < 3:
@@ -186,6 +185,8 @@ def convert_punctuation(text):
186
  def handle_interaction(query, api_key, state):
187
  if state is None:
188
  state = {"history": []}
 
 
189
  query = convert_punctuation(query)
190
  answer, insight_questions = answer_multiple_questions(query, api_key)
191
  state["history"].append((query, answer))
@@ -286,4 +287,7 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as iface:
286
  )
287
 
288
  if __name__ == "__main__":
289
- iface.launch(share=True, debug=True)
 
 
 
 
4
  import os
5
  from dotenv import load_dotenv
6
  load_dotenv()
 
7
  from langchain_community.utils import user_agent
8
  from langchain_groq import ChatGroq
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
17
  import re
18
 
19
  def initialize_llm(api_key):
 
20
  return ChatGroq(
21
  groq_api_key=api_key,
22
  model_name='llama-3.1-70b-versatile'
 
65
 
66
  text_splitter = RecursiveCharacterTextSplitter(
67
  chunk_size=512,
68
+ chunk_overlap=64,
69
  length_function=len,
70
+ is_separator_regex=False,
71
  separators=["\n\n\n","\n\n", "\n", "。"]
72
  )
73
 
 
75
  print(f"分割後的文件數量:{len(split_docs)}")
76
 
77
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
78
+ print(f"\n成功初始化微軟嵌入模型")
79
 
80
  print(f"\n開始建立向量資料庫")
81
  vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory="./Knowledge-base")
 
92
  print(f"成功建立檢索器,搜尋演算法:Maximum Marginal Relevance Retrieval")
93
 
94
  template = """Let's work this out in a step by step way to be sure we have the right answer. Must reply to me in Taiwanese Traditional Chinese.
95
+ 在回答之前,請仔細分析檢索到的上下文,確保你的回答準確完整反映了上下文中的訊息,而不是依賴先前的知識,在回應的答案中絕對不要提到是根據上下文回答。
96
+ 如果檢索到的多個上下文之間存在聯繫,請整合這些訊息以提供更全面的回答,但要避免過度推斷。
97
  如果檢索到的上下文不包含足夠回答問題的訊息,請誠實的說明,不要試圖編造答案。
98
 
99
  上下文: {context}
 
126
 
127
  請提供3個簡短但有深度的問題,這些問題應該符合:
128
  1. 與原始問題緊密相關
129
+ 2. 重新準確描述原始問題
130
+ 3. 引導更深入的解決原始問題
131
 
132
  請直接列出這3個問題,每個問題一行,不要添加編號或其他文字。
133
  """
 
148
  try:
149
  llm = initialize_llm(api_key)
150
  chain = create_chain(llm)
151
+ result = chain.invoke({"query": query})
152
  answer = result["result"]
153
  insight_questions = generate_insight_questions(query, api_key)
154
  while len(insight_questions) < 3:
 
185
  def handle_interaction(query, api_key, state):
186
  if state is None:
187
  state = {"history": []}
188
+ if not api_key:
189
+ api_key = os.getenv("Llama70B_Key")
190
  query = convert_punctuation(query)
191
  answer, insight_questions = answer_multiple_questions(query, api_key)
192
  state["history"].append((query, answer))
 
287
  )
288
 
289
  if __name__ == "__main__":
290
+ if "SPACE_ID" in os.environ:
291
+ iface.launch()
292
+ else:
293
+ iface.launch(share=True)